[flang-commits] [flang] 0b3f9d8 - [flang]: This is to fix the HLFIR path for PPC Vector type intrinsics. (#66547)

via flang-commits flang-commits at lists.llvm.org
Tue Sep 26 07:36:20 PDT 2023


Author: Daniel Chen
Date: 2023-09-26T10:36:13-04:00
New Revision: 0b3f9d8561226e3771db7f49dfb43d1301efc3c3

URL: https://github.com/llvm/llvm-project/commit/0b3f9d8561226e3771db7f49dfb43d1301efc3c3
DIFF: https://github.com/llvm/llvm-project/commit/0b3f9d8561226e3771db7f49dfb43d1301efc3c3.diff

LOG: [flang]: This is to fix the HLFIR path for PPC Vector type intrinsics. (#66547)

PowerPC Vector type intrinsics currently crashes with
`-flang-experimental-hlfir` is specified.

This patch is to fix the HLFIR path for PowerPC Vector type intrinsics.

The patch:
1. Added the `flang-experimental-hlfir` option to all PowerPC vector intrinsic testing.
2. Removed the FIR/MLIR testing to reduce the maintenance cost.
3. Fixed a few verification IR for some non-functional changes in LLVM IR in HLFIR path.

Added: 
    flang/test/Lower/PowerPC/ppc-vec-abs.f90
    flang/test/Lower/PowerPC/ppc-vec-add-and-mul-sub-xor.f90
    flang/test/Lower/PowerPC/ppc-vec-any.f90
    flang/test/Lower/PowerPC/ppc-vec-cmp.f90
    flang/test/Lower/PowerPC/ppc-vec-cvf-elem-order.f90
    flang/test/Lower/PowerPC/ppc-vec-max-min-madd-nmsub.f90
    flang/test/Lower/PowerPC/ppc-vec-sel.f90

Modified: 
    flang/include/flang/Optimizer/Dialect/FIRType.h
    flang/lib/Lower/ConvertCall.cpp
    flang/lib/Optimizer/HLFIR/IR/HLFIRDialect.cpp
    flang/test/Lower/PowerPC/ppc-intrinsics.f90
    flang/test/Lower/PowerPC/ppc-mma-accumulator-move-clear.f90
    flang/test/Lower/PowerPC/ppc-mma-assemble-disassemble.f90
    flang/test/Lower/PowerPC/ppc-mma-outer-product-1.f90
    flang/test/Lower/PowerPC/ppc-mma-outer-product-2.f90
    flang/test/Lower/PowerPC/ppc-pwr10-vec-intrinsics.f90
    flang/test/Lower/PowerPC/ppc-vec-convert.f90
    flang/test/Lower/PowerPC/ppc-vec-extract-elem-order.f90
    flang/test/Lower/PowerPC/ppc-vec-extract.f90
    flang/test/Lower/PowerPC/ppc-vec-insert-elem-order.f90
    flang/test/Lower/PowerPC/ppc-vec-insert.f90
    flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90
    flang/test/Lower/PowerPC/ppc-vec-load.f90
    flang/test/Lower/PowerPC/ppc-vec-merge-elem-order.f90
    flang/test/Lower/PowerPC/ppc-vec-merge.f90
    flang/test/Lower/PowerPC/ppc-vec-perm-elem-order.f90
    flang/test/Lower/PowerPC/ppc-vec-perm.f90
    flang/test/Lower/PowerPC/ppc-vec-shift-be-le.f90
    flang/test/Lower/PowerPC/ppc-vec-shift.f90
    flang/test/Lower/PowerPC/ppc-vec-splat-elem-order.f90
    flang/test/Lower/PowerPC/ppc-vec-splat.f90
    flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90
    flang/test/Lower/PowerPC/ppc-vec-store.f90
    flang/test/Lower/PowerPC/ppc-vector-types.f90

Removed: 
    flang/test/Lower/PowerPC/ppc-vec_abs.f90
    flang/test/Lower/PowerPC/ppc-vec_add-and-mul-sub-xor.f90
    flang/test/Lower/PowerPC/ppc-vec_any.f90
    flang/test/Lower/PowerPC/ppc-vec_cmp.f90
    flang/test/Lower/PowerPC/ppc-vec_cvf-elem-order.f90
    flang/test/Lower/PowerPC/ppc-vec_max-min-madd-nmsub.f90
    flang/test/Lower/PowerPC/ppc-vec_sel.f90


################################################################################
diff  --git a/flang/include/flang/Optimizer/Dialect/FIRType.h b/flang/include/flang/Optimizer/Dialect/FIRType.h
index bbc862483aea6da..77807ea2a308c67 100644
--- a/flang/include/flang/Optimizer/Dialect/FIRType.h
+++ b/flang/include/flang/Optimizer/Dialect/FIRType.h
@@ -146,6 +146,11 @@ inline bool isa_integer(mlir::Type t) {
   return t.isa<mlir::IndexType, mlir::IntegerType, fir::IntegerType>();
 }
 
+/// Is `t` a vector type?
+inline bool isa_vector(mlir::Type t) {
+  return t.isa<mlir::VectorType, fir::VectorType>();
+}
+
 mlir::Type parseFirType(FIROpsDialect *, mlir::DialectAsmParser &parser);
 
 void printFirType(FIROpsDialect *, mlir::Type ty, mlir::DialectAsmPrinter &p);
@@ -165,7 +170,7 @@ inline bool isa_char(mlir::Type t) { return t.isa<fir::CharacterType>(); }
 /// Is `t` a trivial intrinsic type? CHARACTER is <em>excluded</em> because it
 /// is a dependent type.
 inline bool isa_trivial(mlir::Type t) {
-  return isa_integer(t) || isa_real(t) || isa_complex(t) ||
+  return isa_integer(t) || isa_real(t) || isa_complex(t) || isa_vector(t) ||
          t.isa<fir::LogicalType>();
 }
 

diff  --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index f69144b5de2fbd9..0510965a596fb05 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -1515,8 +1515,8 @@ genIntrinsicRefCore(Fortran::lower::PreparedActualArguments &loweredActuals,
     scalarResultType = hlfir::getFortranElementType(*callContext.resultType);
   const std::string intrinsicName = callContext.getProcedureName();
   // Let the intrinsic library lower the intrinsic procedure call.
-  auto [resultExv, mustBeFreed] =
-      genIntrinsicCall(builder, loc, intrinsicName, scalarResultType, operands);
+  auto [resultExv, mustBeFreed] = genIntrinsicCall(
+      builder, loc, intrinsicName, scalarResultType, operands, &converter);
   for (const hlfir::CleanupFunction &fn : cleanupFns)
     fn();
   if (!fir::getBase(resultExv))

diff  --git a/flang/lib/Optimizer/HLFIR/IR/HLFIRDialect.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIRDialect.cpp
index d3a6fb305c19919..08b2b0538c732a2 100644
--- a/flang/lib/Optimizer/HLFIR/IR/HLFIRDialect.cpp
+++ b/flang/lib/Optimizer/HLFIR/IR/HLFIRDialect.cpp
@@ -87,6 +87,7 @@ bool hlfir::isFortranVariableType(mlir::Type type) {
         return eleType.isa<fir::BaseBoxType>() || !fir::hasDynamicSize(eleType);
       })
       .Case<fir::BaseBoxType, fir::BoxCharType>([](auto) { return true; })
+      .Case<fir::VectorType>([](auto) { return true; })
       .Default([](mlir::Type) { return false; });
 }
 

diff  --git a/flang/test/Lower/PowerPC/ppc-intrinsics.f90 b/flang/test/Lower/PowerPC/ppc-intrinsics.f90
index 6d6a8ef147520f3..809fd184bdaf463 100644
--- a/flang/test/Lower/PowerPC/ppc-intrinsics.f90
+++ b/flang/test/Lower/PowerPC/ppc-intrinsics.f90
@@ -1,202 +1,176 @@
-! RUN: bbc -emit-fir %s -outline-intrinsics -o - | FileCheck --check-prefixes="CHECK-FIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK-LLVMIR" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
 ! CHECK-LABEL: fmadd_testr
 subroutine fmadd_testr(a, x, y)
   real :: a, x, y, z
   z = fmadd(a, x, y)
-! CHECK-FIR: fir.call @fir.__ppc_fmadd.contract.f32.f32.f32.f32
-! CHECK-LLVMIR: call contract float @llvm.fma.f32(float %{{[0-9]}}, float %{{[0-9]}}, float %{{[0-9]}})
+! LLVMIR: call contract float @llvm.fma.f32(float %{{[0-9]}}, float %{{[0-9]}}, float %{{[0-9]}})
 end
 
 ! CHECK-LABEL: fmadd_testd
 subroutine fmadd_testd(a, x, y)
   real(8) :: a, x, y, z
   z = fmadd(a, x, y)
-! CHECK-FIR: fir.call @fir.__ppc_fmadd.contract.f64.f64.f64.f64
-! CHECK-LLVMIR: call contract double @llvm.fma.f64(double %{{[0-9]}}, double %{{[0-9]}}, double %{{[0-9]}})
+! LLVMIR: call contract double @llvm.fma.f64(double %{{[0-9]}}, double %{{[0-9]}}, double %{{[0-9]}})
 end
 
 ! CHECK-LABEL: fnmadd_testr
 subroutine fnmadd_testr(a, x, y)
   real :: a, x, y, z
   z = fnmadd(a, x, y)
-! CHECK-FIR: fir.call @fir.__ppc_fnmadd.contract.f32.f32.f32.f32
-! CHECK-LLVMIR: call contract float @llvm.ppc.fnmadds(float %{{[0-9]}}, float %{{[0-9]}}, float %{{[0-9]}})
+! LLVMIR: call contract float @llvm.ppc.fnmadds(float %{{[0-9]}}, float %{{[0-9]}}, float %{{[0-9]}})
 end
 
 ! CHECK-LABEL: fnmadd_testd
 subroutine fnmadd_testd(a, x, y)
   real(8) :: a, x, y, z
   z = fnmadd(a, x, y)
-! CHECK-FIR: fir.call @fir.__ppc_fnmadd.contract.f64.f64.f64.f64
-! CHECK-LLVMIR: call contract double @llvm.ppc.fnmadd(double %{{[0-9]}}, double %{{[0-9]}}, double %{{[0-9]}})
+! LLVMIR: call contract double @llvm.ppc.fnmadd(double %{{[0-9]}}, double %{{[0-9]}}, double %{{[0-9]}})
 end
 
 ! CHECK-LABEL: fmsub_testr
 subroutine fmsub_testr(a, x, y)
   real :: a, x, y, z
   z = fmsub(a, x, y)
-! CHECK-FIR: fir.call @fir.__ppc_fmsub.contract.f32.f32.f32.f32
-! CHECK-LLVMIR: call contract float @llvm.ppc.fmsubs(float %{{[0-9]}}, float %{{[0-9]}}, float %{{[0-9]}})
+! LLVMIR: call contract float @llvm.ppc.fmsubs(float %{{[0-9]}}, float %{{[0-9]}}, float %{{[0-9]}})
 end
 
 ! CHECK-LABEL: fmsub_testd
 subroutine fmsub_testd(a, x, y)
   real(8) :: a, x, y, z
   z = fmsub(a, x, y)
-! CHECK-FIR: fir.call @fir.__ppc_fmsub.contract.f64.f64.f64.f64
-! CHECK-LLVMIR: call contract double @llvm.ppc.fmsub(double %{{[0-9]}}, double %{{[0-9]}}, double %{{[0-9]}})
+! LLVMIR: call contract double @llvm.ppc.fmsub(double %{{[0-9]}}, double %{{[0-9]}}, double %{{[0-9]}})
 end
 
 ! CHECK-LABEL: fnmsub_testr
 subroutine fnmsub_testr(a, x, y)
   real :: a, x, y, z
   z = fnmsub(a, x, y)
-! CHECK-FIR: fir.call @fir.__ppc_fnmsub.contract.f32.f32.f32.f32
-! CHECK-LLVMIR: call contract float @llvm.ppc.fnmsub.f32(float %{{[0-9]}}, float %{{[0-9]}}, float %{{[0-9]}})
+! LLVMIR: call contract float @llvm.ppc.fnmsub.f32(float %{{[0-9]}}, float %{{[0-9]}}, float %{{[0-9]}})
 end
 
 ! CHECK-LABEL: fnmsub_testd
 subroutine fnmsub_testd(a, x, y)
   real(8) :: a, x, y, z
   z = fnmsub(a, x, y)
-! CHECK-FIR: fir.call @fir.__ppc_fnmsub.contract.f64.f64.f64.f64
-! CHECK-LLVMIR: call contract double @llvm.ppc.fnmsub.f64(double %{{[0-9]}}, double %{{[0-9]}}, double %{{[0-9]}})
+! LLVMIR: call contract double @llvm.ppc.fnmsub.f64(double %{{[0-9]}}, double %{{[0-9]}}, double %{{[0-9]}})
 end
 
 ! CHECK-LABEL: fctid_test
 subroutine fctid_test(x)
   real(8) :: x, r
   r = fctid(x)
-! CHECK-FIR: fir.call @fir.__ppc_fctid.contract.f64.f64
-! CHECK-LLVMIR: call contract double @llvm.ppc.fctid(double %{{[0-9]}})
+! LLVMIR: call contract double @llvm.ppc.fctid(double %{{[0-9]}})
 end
 
 ! CHECK-LABEL: fctidz_test
 subroutine fctidz_test(x)
   real(8) :: x, r
   r = fctidz(x)
-! CHECK-FIR: fir.call @fir.__ppc_fctidz.contract.f64.f64
-! CHECK-LLVMIR: call contract double @llvm.ppc.fctidz(double %{{[0-9]}})
+! LLVMIR: call contract double @llvm.ppc.fctidz(double %{{[0-9]}})
 end
 
 ! CHECK-LABEL: fctiw_test
 subroutine fctiw_test(x)
   real(8) :: x, r
   r = fctiw(x)
-! CHECK-FIR: fir.call @fir.__ppc_fctiw.contract.f64.f64
-! CHECK-LLVMIR: call contract double @llvm.ppc.fctiw(double %{{[0-9]}})
+! LLVMIR: call contract double @llvm.ppc.fctiw(double %{{[0-9]}})
 end
 
 ! CHECK-LABEL: fctiwz_test
 subroutine fctiwz_test(x)
   real(8) :: x, r
   r = fctiwz(x)
-! CHECK-FIR: fir.call @fir.__ppc_fctiwz.contract.f64.f64
-! CHECK-LLVMIR: call contract double @llvm.ppc.fctiwz(double %{{[0-9]}})
+! LLVMIR: call contract double @llvm.ppc.fctiwz(double %{{[0-9]}})
 end
 
 ! CHECK-LABEL: fctudz_test
 subroutine fctudz_test(x)
   real(8) :: x, r
   r = fctudz(x)
-! CHECK-FIR: fir.call @fir.__ppc_fctudz.contract.f64.f64
-! CHECK-LLVMIR: call contract double @llvm.ppc.fctudz(double %{{[0-9]}})
+! LLVMIR: call contract double @llvm.ppc.fctudz(double %{{[0-9]}})
 end
 
 ! CHECK-LABEL: fctuwz_test
 subroutine fctuwz_test(x)
   real(8) :: x, r
   r = fctuwz(x)
-! CHECK-FIR: fir.call @fir.__ppc_fctuwz.contract.f64.f64
-! CHECK-LLVMIR: call contract double @llvm.ppc.fctuwz(double %{{[0-9]}})
+! LLVMIR: call contract double @llvm.ppc.fctuwz(double %{{[0-9]}})
 end
 
 ! CHECK-LABEL: fcfi_test
 subroutine fcfi_test(i)
   real(8) :: i, r
   r = fcfi(i)
-! CHECK-FIR: fir.call @fir.__ppc_fcfi.contract.f64.f64
-! CHECK-LLVMIR: call contract double @llvm.ppc.fcfid(double %{{[0-9]}})
+! LLVMIR: call contract double @llvm.ppc.fcfid(double %{{[0-9]}})
 end
 
 ! CHECK-LABEL: fcfid_test
 subroutine fcfid_test(i)
   real(8) :: i, r
   r = fcfid(i)
-! CHECK-FIR: fir.call @fir.__ppc_fcfid.contract.f64.f64
-! CHECK-LLVMIR: call contract double @llvm.ppc.fcfid(double %{{[0-9]}})
+! LLVMIR: call contract double @llvm.ppc.fcfid(double %{{[0-9]}})
 end
 
 ! CHECK-LABEL: fcfud_test
 subroutine fcfud_test(i)
   real(8) :: i, r
   r = fcfud(i)
-! CHECK-FIR: fir.call @fir.__ppc_fcfud.contract.f64.f64
-! CHECK-LLVMIR: call contract double @llvm.ppc.fcfud(double %{{[0-9]}})
+! LLVMIR: call contract double @llvm.ppc.fcfud(double %{{[0-9]}})
 end
 
 ! CHECK-LABEL: fnabs_testr(x)
 subroutine fnabs_testr(x)
   real :: x, y
   y = fnabs(x)
-! CHECK-FIR: fir.call @fir.__ppc_fnabs.contract.f32.f32
-! CHECK-LLVMIR: call contract float @llvm.ppc.fnabss(float %{{[0-9]}})
+! LLVMIR: call contract float @llvm.ppc.fnabss(float %{{[0-9]}})
 end
 
 ! CHECK-LABEL: fnabs_testd(x)
 subroutine fnabs_testd(x)
   real(8) :: x, y
   y = fnabs(x)
-! CHECK-FIR: fir.call @fir.__ppc_fnabs.contract.f64.f64
-! CHECK-LLVMIR: call contract double @llvm.ppc.fnabs(double %{{[0-9]}})
+! LLVMIR: call contract double @llvm.ppc.fnabs(double %{{[0-9]}})
 end
 
 !CHECK-LABEL: fre_test(x)
 subroutine fre_test(x)
   real(8) :: x, y
   y = fre(x)
-! CHECK-FIR: fir.call @fir.__ppc_fre.contract.f64.f64
-! CHECK-LLVMIR: call contract double @llvm.ppc.fre(double %{{[0-9]}})
+! LLVMIR: call contract double @llvm.ppc.fre(double %{{[0-9]}})
 end
 
 !CHECK-LABEL: fres_test(x)
 subroutine fres_test(x)
   real :: x, y
   y = fres(x)
-! CHECK-FIR: fir.call @fir.__ppc_fres.contract.f32.f32
-! CHECK-LLVMIR: call contract float @llvm.ppc.fres(float %{{[0-9]}})
+! LLVMIR: call contract float @llvm.ppc.fres(float %{{[0-9]}})
 end
 
 !CHECK-LABEL: frsqrte_test(x)
 subroutine frsqrte_test(x)
   real(8) :: x, y
   y = frsqrte(x)
-! CHECK-FIR: fir.call @fir.__ppc_frsqrte.contract.f64.f64
-! CHECK-LLVMIR: call contract double @llvm.ppc.frsqrte(double %{{[0-9]}})
+! LLVMIR: call contract double @llvm.ppc.frsqrte(double %{{[0-9]}})
 end
 
 !CHECK-LABEL: frsqrtes_test(x)
 subroutine frsqrtes_test(x)
   real :: x, y
   y = frsqrtes(x)
-! CHECK-FIR: fir.call @fir.__ppc_frsqrtes.contract.f32.f32
-! CHECK-LLVMIR: call contract float @llvm.ppc.frsqrtes(float %{{[0-9]}})
+! LLVMIR: call contract float @llvm.ppc.frsqrtes(float %{{[0-9]}})
 end
 
 ! CHECK-LABEL: mtfsf_test
 subroutine mtfsf_test(r)
   real(8) :: r
   call mtfsf(1, r)
-! CHECK-FIR: fir.call @fir.__ppc_mtfsf.void.i32.f64
-! CHECK-LLVMIR: call void @llvm.ppc.mtfsf(i32 {{[0-9]}}, double %{{[0-9]}})
+! LLVMIR: call void @llvm.ppc.mtfsf(i32 {{[0-9]}}, double %{{[0-9]}})
 end
 
 ! CHECK-LABEL: mtfsfi_test
 subroutine mtfsfi_test()
   call mtfsfi(1, 2)
-! CHECK-FIR: fir.call @fir.__ppc_mtfsfi.void.i32.i32
-! CHECK-LLVMIR: call void @llvm.ppc.mtfsfi(i32 {{[0-9]}}, i32 {{[0-9]}})
+! LLVMIR: call void @llvm.ppc.mtfsfi(i32 {{[0-9]}}, i32 {{[0-9]}})
 end

diff  --git a/flang/test/Lower/PowerPC/ppc-mma-accumulator-move-clear.f90 b/flang/test/Lower/PowerPC/ppc-mma-accumulator-move-clear.f90
index cc9689b70343c61..ce07f202a944d37 100644
--- a/flang/test/Lower/PowerPC/ppc-mma-accumulator-move-clear.f90
+++ b/flang/test/Lower/PowerPC/ppc-mma-accumulator-move-clear.f90
@@ -1,4 +1,4 @@
-! RUN: %flang_fc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -triple powerpc64le-unknown-unknown -target-cpu pwr10 -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
       subroutine test_xxmfacc()
@@ -9,10 +9,10 @@ subroutine test_xxmfacc()
       end subroutine test_xxmfacc
 
 !CHECK-LABEL: @test_xxmfacc_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %3 = call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> %2)
-!CHECK:  store <512 x i1> %3, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %3 = call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> %2)
+!LLVMIR:  store <512 x i1> %3, ptr %1, align 64
 
       subroutine test_xxmtacc()
       use, intrinsic :: mma
@@ -22,10 +22,10 @@ subroutine test_xxmtacc()
       end subroutine test_xxmtacc
 
 !CHECK-LABEL: @test_xxmtacc_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %3 = call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> %2)
-!CHECK:  store <512 x i1> %3, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %3 = call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> %2)
+!LLVMIR:  store <512 x i1> %3, ptr %1, align 64
 
       subroutine test_xxsetaccz()
       use, intrinsic :: mma
@@ -35,6 +35,6 @@ subroutine test_xxsetaccz()
       end subroutine test_xxsetaccz
 
 !CHECK-LABEL: @test_xxsetaccz_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
-!CHECK:  store <512 x i1> %2, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
+!LLVMIR:  store <512 x i1> %2, ptr %1, align 64

diff  --git a/flang/test/Lower/PowerPC/ppc-mma-assemble-disassemble.f90 b/flang/test/Lower/PowerPC/ppc-mma-assemble-disassemble.f90
index 1ae6c5305345f36..d3872891853d427 100644
--- a/flang/test/Lower/PowerPC/ppc-mma-assemble-disassemble.f90
+++ b/flang/test/Lower/PowerPC/ppc-mma-assemble-disassemble.f90
@@ -1,4 +1,4 @@
-! RUN: %flang_fc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -triple powerpc64le-unknown-unknown -target-cpu pwr10 -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
 ! mma_assemble_acc
@@ -12,17 +12,17 @@ subroutine test_assemble_acc_i1()
       end subroutine test_assemble_acc_i1
 
 ! CHECK-LABEL: @test_assemble_acc_i1
-! CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-! CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-! CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-! CHECK:  %4 = alloca <16 x i8>, i64 1, align 16
-! CHECK:  %5 = alloca <16 x i8>, i64 1, align 16
-! CHECK:  %6 = load <16 x i8>, ptr %2, align 16
-! CHECK:  %7 = load <16 x i8>, ptr %3, align 16
-! CHECK:  %8 = load <16 x i8>, ptr %4, align 16
-! CHECK:  %9 = load <16 x i8>, ptr %5, align 16
-! CHECK:  %10 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %6, <16 x i8> %7, <16 x i8> %8, <16 x i8> %9)
-! CHECK:  store <512 x i1> %10, ptr %1, align 64
+! LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+! LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+! LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+! LLVMIR:  %4 = alloca <16 x i8>, i64 1, align 16
+! LLVMIR:  %5 = alloca <16 x i8>, i64 1, align 16
+! LLVMIR:  %6 = load <16 x i8>, ptr %2, align 16
+! LLVMIR:  %7 = load <16 x i8>, ptr %3, align 16
+! LLVMIR:  %8 = load <16 x i8>, ptr %4, align 16
+! LLVMIR:  %9 = load <16 x i8>, ptr %5, align 16
+! LLVMIR:  %10 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %6, <16 x i8> %7, <16 x i8> %8, <16 x i8> %9)
+! LLVMIR:  store <512 x i1> %10, ptr %1, align 64
 
       subroutine test_assemble_acc_i2()
       use, intrinsic :: mma
@@ -33,21 +33,21 @@ subroutine test_assemble_acc_i2()
       end subroutine test_assemble_acc_i2
 
 ! CHECK-LABEL: @test_assemble_acc_i2
-! CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-! CHECK:  %2 = alloca <8 x i16>, i64 1, align 16
-! CHECK:  %3 = alloca <8 x i16>, i64 1, align 16
-! CHECK:  %4 = alloca <8 x i16>, i64 1, align 16
-! CHECK:  %5 = alloca <8 x i16>, i64 1, align 16
-! CHECK:  %6 = load <8 x i16>, ptr %2, align 16
-! CHECK:  %7 = load <8 x i16>, ptr %3, align 16
-! CHECK:  %8 = load <8 x i16>, ptr %4, align 16
-! CHECK:  %9 = load <8 x i16>, ptr %5, align 16
-! CHECK:  %10 = bitcast <8 x i16> %6 to <16 x i8>
-! CHECK:  %11 = bitcast <8 x i16> %7 to <16 x i8>
-! CHECK:  %12 = bitcast <8 x i16> %8 to <16 x i8>
-! CHECK:  %13 = bitcast <8 x i16> %9 to <16 x i8>
-! CHECK:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
-! CHECK:  store <512 x i1> %14, ptr %1, align 64
+! LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+! LLVMIR:  %2 = alloca <8 x i16>, i64 1, align 16
+! LLVMIR:  %3 = alloca <8 x i16>, i64 1, align 16
+! LLVMIR:  %4 = alloca <8 x i16>, i64 1, align 16
+! LLVMIR:  %5 = alloca <8 x i16>, i64 1, align 16
+! LLVMIR:  %6 = load <8 x i16>, ptr %2, align 16
+! LLVMIR:  %7 = load <8 x i16>, ptr %3, align 16
+! LLVMIR:  %8 = load <8 x i16>, ptr %4, align 16
+! LLVMIR:  %9 = load <8 x i16>, ptr %5, align 16
+! LLVMIR:  %10 = bitcast <8 x i16> %6 to <16 x i8>
+! LLVMIR:  %11 = bitcast <8 x i16> %7 to <16 x i8>
+! LLVMIR:  %12 = bitcast <8 x i16> %8 to <16 x i8>
+! LLVMIR:  %13 = bitcast <8 x i16> %9 to <16 x i8>
+! LLVMIR:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
+! LLVMIR:  store <512 x i1> %14, ptr %1, align 64
 
 
       subroutine test_assemble_acc_i4()
@@ -59,21 +59,21 @@ subroutine test_assemble_acc_i4()
       end subroutine test_assemble_acc_i4
 
 ! CHECK-LABEL: @test_assemble_acc_i4
-! CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-! CHECK:  %2 = alloca <4 x i32>, i64 1, align 16
-! CHECK:  %3 = alloca <4 x i32>, i64 1, align 16
-! CHECK:  %4 = alloca <4 x i32>, i64 1, align 16
-! CHECK:  %5 = alloca <4 x i32>, i64 1, align 16
-! CHECK:  %6 = load <4 x i32>, ptr %2, align 16
-! CHECK:  %7 = load <4 x i32>, ptr %3, align 16
-! CHECK:  %8 = load <4 x i32>, ptr %4, align 16
-! CHECK:  %9 = load <4 x i32>, ptr %5, align 16
-! CHECK:  %10 = bitcast <4 x i32> %6 to <16 x i8>
-! CHECK:  %11 = bitcast <4 x i32> %7 to <16 x i8>
-! CHECK:  %12 = bitcast <4 x i32> %8 to <16 x i8>
-! CHECK:  %13 = bitcast <4 x i32> %9 to <16 x i8>
-! CHECK:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
-! CHECK:  store <512 x i1> %14, ptr %1, align 64
+! LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+! LLVMIR:  %2 = alloca <4 x i32>, i64 1, align 16
+! LLVMIR:  %3 = alloca <4 x i32>, i64 1, align 16
+! LLVMIR:  %4 = alloca <4 x i32>, i64 1, align 16
+! LLVMIR:  %5 = alloca <4 x i32>, i64 1, align 16
+! LLVMIR:  %6 = load <4 x i32>, ptr %2, align 16
+! LLVMIR:  %7 = load <4 x i32>, ptr %3, align 16
+! LLVMIR:  %8 = load <4 x i32>, ptr %4, align 16
+! LLVMIR:  %9 = load <4 x i32>, ptr %5, align 16
+! LLVMIR:  %10 = bitcast <4 x i32> %6 to <16 x i8>
+! LLVMIR:  %11 = bitcast <4 x i32> %7 to <16 x i8>
+! LLVMIR:  %12 = bitcast <4 x i32> %8 to <16 x i8>
+! LLVMIR:  %13 = bitcast <4 x i32> %9 to <16 x i8>
+! LLVMIR:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
+! LLVMIR:  store <512 x i1> %14, ptr %1, align 64
 
       subroutine test_assemble_acc_i8()
       use, intrinsic :: mma
@@ -84,21 +84,21 @@ subroutine test_assemble_acc_i8()
       end subroutine test_assemble_acc_i8
 
 ! CHECK-LABEL: @test_assemble_acc_i8
-! CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-! CHECK:  %2 = alloca <2 x i64>, i64 1, align 16
-! CHECK:  %3 = alloca <2 x i64>, i64 1, align 16
-! CHECK:  %4 = alloca <2 x i64>, i64 1, align 16
-! CHECK:  %5 = alloca <2 x i64>, i64 1, align 16
-! CHECK:  %6 = load <2 x i64>, ptr %2, align 16
-! CHECK:  %7 = load <2 x i64>, ptr %3, align 16
-! CHECK:  %8 = load <2 x i64>, ptr %4, align 16
-! CHECK:  %9 = load <2 x i64>, ptr %5, align 16
-! CHECK:  %10 = bitcast <2 x i64> %6 to <16 x i8>
-! CHECK:  %11 = bitcast <2 x i64> %7 to <16 x i8>
-! CHECK:  %12 = bitcast <2 x i64> %8 to <16 x i8>
-! CHECK:  %13 = bitcast <2 x i64> %9 to <16 x i8>
-! CHECK:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
-! CHECK:  store <512 x i1> %14, ptr %1, align 64
+! LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+! LLVMIR:  %2 = alloca <2 x i64>, i64 1, align 16
+! LLVMIR:  %3 = alloca <2 x i64>, i64 1, align 16
+! LLVMIR:  %4 = alloca <2 x i64>, i64 1, align 16
+! LLVMIR:  %5 = alloca <2 x i64>, i64 1, align 16
+! LLVMIR:  %6 = load <2 x i64>, ptr %2, align 16
+! LLVMIR:  %7 = load <2 x i64>, ptr %3, align 16
+! LLVMIR:  %8 = load <2 x i64>, ptr %4, align 16
+! LLVMIR:  %9 = load <2 x i64>, ptr %5, align 16
+! LLVMIR:  %10 = bitcast <2 x i64> %6 to <16 x i8>
+! LLVMIR:  %11 = bitcast <2 x i64> %7 to <16 x i8>
+! LLVMIR:  %12 = bitcast <2 x i64> %8 to <16 x i8>
+! LLVMIR:  %13 = bitcast <2 x i64> %9 to <16 x i8>
+! LLVMIR:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
+! LLVMIR:  store <512 x i1> %14, ptr %1, align 64
 
 
       subroutine test_assemble_acc_u1()
@@ -110,17 +110,17 @@ subroutine test_assemble_acc_u1()
       end subroutine test_assemble_acc_u1
 
 ! CHECK-LABEL: @test_assemble_acc_u1
-! CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-! CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-! CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-! CHECK:  %4 = alloca <16 x i8>, i64 1, align 16
-! CHECK:  %5 = alloca <16 x i8>, i64 1, align 16
-! CHECK:  %6 = load <16 x i8>, ptr %2, align 16
-! CHECK:  %7 = load <16 x i8>, ptr %3, align 16
-! CHECK:  %8 = load <16 x i8>, ptr %4, align 16
-! CHECK:  %9 = load <16 x i8>, ptr %5, align 16
-! CHECK:  %10 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %6, <16 x i8> %7, <16 x i8> %8, <16 x i8> %9)
-! CHECK:  store <512 x i1> %10, ptr %1, align 64
+! LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+! LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+! LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+! LLVMIR:  %4 = alloca <16 x i8>, i64 1, align 16
+! LLVMIR:  %5 = alloca <16 x i8>, i64 1, align 16
+! LLVMIR:  %6 = load <16 x i8>, ptr %2, align 16
+! LLVMIR:  %7 = load <16 x i8>, ptr %3, align 16
+! LLVMIR:  %8 = load <16 x i8>, ptr %4, align 16
+! LLVMIR:  %9 = load <16 x i8>, ptr %5, align 16
+! LLVMIR:  %10 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %6, <16 x i8> %7, <16 x i8> %8, <16 x i8> %9)
+! LLVMIR:  store <512 x i1> %10, ptr %1, align 64
 
       subroutine test_assemble_acc_u2()
       use, intrinsic :: mma
@@ -131,21 +131,21 @@ subroutine test_assemble_acc_u2()
       end subroutine test_assemble_acc_u2
 
 ! CHECK-LABEL: @test_assemble_acc_u2
-! CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-! CHECK:  %2 = alloca <8 x i16>, i64 1, align 16
-! CHECK:  %3 = alloca <8 x i16>, i64 1, align 16
-! CHECK:  %4 = alloca <8 x i16>, i64 1, align 16
-! CHECK:  %5 = alloca <8 x i16>, i64 1, align 16
-! CHECK:  %6 = load <8 x i16>, ptr %2, align 16
-! CHECK:  %7 = load <8 x i16>, ptr %3, align 16
-! CHECK:  %8 = load <8 x i16>, ptr %4, align 16
-! CHECK:  %9 = load <8 x i16>, ptr %5, align 16
-! CHECK:  %10 = bitcast <8 x i16> %6 to <16 x i8>
-! CHECK:  %11 = bitcast <8 x i16> %7 to <16 x i8>
-! CHECK:  %12 = bitcast <8 x i16> %8 to <16 x i8>
-! CHECK:  %13 = bitcast <8 x i16> %9 to <16 x i8>
-! CHECK:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
-! CHECK:  store <512 x i1> %14, ptr %1, align 64
+! LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+! LLVMIR:  %2 = alloca <8 x i16>, i64 1, align 16
+! LLVMIR:  %3 = alloca <8 x i16>, i64 1, align 16
+! LLVMIR:  %4 = alloca <8 x i16>, i64 1, align 16
+! LLVMIR:  %5 = alloca <8 x i16>, i64 1, align 16
+! LLVMIR:  %6 = load <8 x i16>, ptr %2, align 16
+! LLVMIR:  %7 = load <8 x i16>, ptr %3, align 16
+! LLVMIR:  %8 = load <8 x i16>, ptr %4, align 16
+! LLVMIR:  %9 = load <8 x i16>, ptr %5, align 16
+! LLVMIR:  %10 = bitcast <8 x i16> %6 to <16 x i8>
+! LLVMIR:  %11 = bitcast <8 x i16> %7 to <16 x i8>
+! LLVMIR:  %12 = bitcast <8 x i16> %8 to <16 x i8>
+! LLVMIR:  %13 = bitcast <8 x i16> %9 to <16 x i8>
+! LLVMIR:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
+! LLVMIR:  store <512 x i1> %14, ptr %1, align 64
 
       subroutine test_assemble_acc_u4()
       use, intrinsic :: mma
@@ -156,21 +156,21 @@ subroutine test_assemble_acc_u4()
       end subroutine test_assemble_acc_u4
 
 ! CHECK-LABEL: @test_assemble_acc_u4
-! CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-! CHECK:  %2 = alloca <4 x i32>, i64 1, align 16
-! CHECK:  %3 = alloca <4 x i32>, i64 1, align 16
-! CHECK:  %4 = alloca <4 x i32>, i64 1, align 16
-! CHECK:  %5 = alloca <4 x i32>, i64 1, align 16
-! CHECK:  %6 = load <4 x i32>, ptr %2, align 16
-! CHECK:  %7 = load <4 x i32>, ptr %3, align 16
-! CHECK:  %8 = load <4 x i32>, ptr %4, align 16
-! CHECK:  %9 = load <4 x i32>, ptr %5, align 16
-! CHECK:  %10 = bitcast <4 x i32> %6 to <16 x i8>
-! CHECK:  %11 = bitcast <4 x i32> %7 to <16 x i8>
-! CHECK:  %12 = bitcast <4 x i32> %8 to <16 x i8>
-! CHECK:  %13 = bitcast <4 x i32> %9 to <16 x i8>
-! CHECK:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
-! CHECK:  store <512 x i1> %14, ptr %1, align 64
+! LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+! LLVMIR:  %2 = alloca <4 x i32>, i64 1, align 16
+! LLVMIR:  %3 = alloca <4 x i32>, i64 1, align 16
+! LLVMIR:  %4 = alloca <4 x i32>, i64 1, align 16
+! LLVMIR:  %5 = alloca <4 x i32>, i64 1, align 16
+! LLVMIR:  %6 = load <4 x i32>, ptr %2, align 16
+! LLVMIR:  %7 = load <4 x i32>, ptr %3, align 16
+! LLVMIR:  %8 = load <4 x i32>, ptr %4, align 16
+! LLVMIR:  %9 = load <4 x i32>, ptr %5, align 16
+! LLVMIR:  %10 = bitcast <4 x i32> %6 to <16 x i8>
+! LLVMIR:  %11 = bitcast <4 x i32> %7 to <16 x i8>
+! LLVMIR:  %12 = bitcast <4 x i32> %8 to <16 x i8>
+! LLVMIR:  %13 = bitcast <4 x i32> %9 to <16 x i8>
+! LLVMIR:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
+! LLVMIR:  store <512 x i1> %14, ptr %1, align 64
 
       subroutine test_assemble_acc_u8()
       use, intrinsic :: mma
@@ -181,21 +181,21 @@ subroutine test_assemble_acc_u8()
       end subroutine test_assemble_acc_u8
 
 ! CHECK-LABEL: @test_assemble_acc_u8
-! CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-! CHECK:  %2 = alloca <2 x i64>, i64 1, align 16
-! CHECK:  %3 = alloca <2 x i64>, i64 1, align 16
-! CHECK:  %4 = alloca <2 x i64>, i64 1, align 16
-! CHECK:  %5 = alloca <2 x i64>, i64 1, align 16
-! CHECK:  %6 = load <2 x i64>, ptr %2, align 16
-! CHECK:  %7 = load <2 x i64>, ptr %3, align 16
-! CHECK:  %8 = load <2 x i64>, ptr %4, align 16
-! CHECK:  %9 = load <2 x i64>, ptr %5, align 16
-! CHECK:  %10 = bitcast <2 x i64> %6 to <16 x i8>
-! CHECK:  %11 = bitcast <2 x i64> %7 to <16 x i8>
-! CHECK:  %12 = bitcast <2 x i64> %8 to <16 x i8>
-! CHECK:  %13 = bitcast <2 x i64> %9 to <16 x i8>
-! CHECK:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
-! CHECK:  store <512 x i1> %14, ptr %1, align 64
+! LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+! LLVMIR:  %2 = alloca <2 x i64>, i64 1, align 16
+! LLVMIR:  %3 = alloca <2 x i64>, i64 1, align 16
+! LLVMIR:  %4 = alloca <2 x i64>, i64 1, align 16
+! LLVMIR:  %5 = alloca <2 x i64>, i64 1, align 16
+! LLVMIR:  %6 = load <2 x i64>, ptr %2, align 16
+! LLVMIR:  %7 = load <2 x i64>, ptr %3, align 16
+! LLVMIR:  %8 = load <2 x i64>, ptr %4, align 16
+! LLVMIR:  %9 = load <2 x i64>, ptr %5, align 16
+! LLVMIR:  %10 = bitcast <2 x i64> %6 to <16 x i8>
+! LLVMIR:  %11 = bitcast <2 x i64> %7 to <16 x i8>
+! LLVMIR:  %12 = bitcast <2 x i64> %8 to <16 x i8>
+! LLVMIR:  %13 = bitcast <2 x i64> %9 to <16 x i8>
+! LLVMIR:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
+! LLVMIR:  store <512 x i1> %14, ptr %1, align 64
 
       subroutine test_assemble_acc_r4()
       use, intrinsic :: mma
@@ -206,21 +206,21 @@ subroutine test_assemble_acc_r4()
       end subroutine test_assemble_acc_r4
 
 ! CHECK-LABEL: @test_assemble_acc_r4
-! CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-! CHECK:  %2 = alloca <4 x float>, i64 1, align 16
-! CHECK:  %3 = alloca <4 x float>, i64 1, align 16
-! CHECK:  %4 = alloca <4 x float>, i64 1, align 16
-! CHECK:  %5 = alloca <4 x float>, i64 1, align 16
-! CHECK:  %6 = load <4 x float>, ptr %2, align 16
-! CHECK:  %7 = load <4 x float>, ptr %3, align 16
-! CHECK:  %8 = load <4 x float>, ptr %4, align 16
-! CHECK:  %9 = load <4 x float>, ptr %5, align 16
-! CHECK:  %10 = bitcast <4 x float> %6 to <16 x i8>
-! CHECK:  %11 = bitcast <4 x float> %7 to <16 x i8>
-! CHECK:  %12 = bitcast <4 x float> %8 to <16 x i8>
-! CHECK:  %13 = bitcast <4 x float> %9 to <16 x i8>
-! CHECK:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
-! CHECK:  store <512 x i1> %14, ptr %1, align 64
+! LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+! LLVMIR:  %2 = alloca <4 x float>, i64 1, align 16
+! LLVMIR:  %3 = alloca <4 x float>, i64 1, align 16
+! LLVMIR:  %4 = alloca <4 x float>, i64 1, align 16
+! LLVMIR:  %5 = alloca <4 x float>, i64 1, align 16
+! LLVMIR:  %6 = load <4 x float>, ptr %2, align 16
+! LLVMIR:  %7 = load <4 x float>, ptr %3, align 16
+! LLVMIR:  %8 = load <4 x float>, ptr %4, align 16
+! LLVMIR:  %9 = load <4 x float>, ptr %5, align 16
+! LLVMIR:  %10 = bitcast <4 x float> %6 to <16 x i8>
+! LLVMIR:  %11 = bitcast <4 x float> %7 to <16 x i8>
+! LLVMIR:  %12 = bitcast <4 x float> %8 to <16 x i8>
+! LLVMIR:  %13 = bitcast <4 x float> %9 to <16 x i8>
+! LLVMIR:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
+! LLVMIR:  store <512 x i1> %14, ptr %1, align 64
 
       subroutine test_assemble_acc_r8()
       use, intrinsic :: mma
@@ -231,21 +231,21 @@ subroutine test_assemble_acc_r8()
       end subroutine test_assemble_acc_r8
 
 !CHECK-LABEL: @test_assemble_acc_r8
-!CHECK:   %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:   %2 = alloca <2 x double>, i64 1, align 16
-!CHECK:   %3 = alloca <2 x double>, i64 1, align 16
-!CHECK:   %4 = alloca <2 x double>, i64 1, align 16
-!CHECK:   %5 = alloca <2 x double>, i64 1, align 16
-!CHECK:   %6 = load <2 x double>, ptr %2, align 16
-!CHECK:   %7 = load <2 x double>, ptr %3, align 16
-!CHECK:   %8 = load <2 x double>, ptr %4, align 16
-!CHECK:   %9 = load <2 x double>, ptr %5, align 16
-!CHECK:   %10 = bitcast <2 x double> %6 to <16 x i8>
-!CHECK:   %11 = bitcast <2 x double> %7 to <16 x i8>
-!CHECK:   %12 = bitcast <2 x double> %8 to <16 x i8>
-!CHECK:   %13 = bitcast <2 x double> %9 to <16 x i8>
-!CHECK:   %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
-!CHECK:   store <512 x i1> %14, ptr %1, align 64
+!LLVMIR:   %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:   %2 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:   %3 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:   %4 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:   %5 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:   %6 = load <2 x double>, ptr %2, align 16
+!LLVMIR:   %7 = load <2 x double>, ptr %3, align 16
+!LLVMIR:   %8 = load <2 x double>, ptr %4, align 16
+!LLVMIR:   %9 = load <2 x double>, ptr %5, align 16
+!LLVMIR:   %10 = bitcast <2 x double> %6 to <16 x i8>
+!LLVMIR:   %11 = bitcast <2 x double> %7 to <16 x i8>
+!LLVMIR:   %12 = bitcast <2 x double> %8 to <16 x i8>
+!LLVMIR:   %13 = bitcast <2 x double> %9 to <16 x i8>
+!LLVMIR:   %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
+!LLVMIR:   store <512 x i1> %14, ptr %1, align 64
 
 ! mma_assemble_pair
 
@@ -257,14 +257,14 @@ subroutine test_mma_assemble_pair_i1()
       call mma_assemble_pair(vp, vi10, vi11)
       end subroutine test_mma_assemble_pair_i1
 
-!CHECK: @test_mma_assemble_pair_i1_
-!CHECK:  %1 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %4 = load <16 x i8>, ptr %1, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %6 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <256 x i1> %6, ptr %3, align 32
+!LLVMIR: @test_mma_assemble_pair_i1_
+!LLVMIR:  %1 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %4 = load <16 x i8>, ptr %1, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %6 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <256 x i1> %6, ptr %3, align 32
 
       subroutine test_mma_assemble_pair_i2()
       use, intrinsic :: mma
@@ -274,16 +274,16 @@ subroutine test_mma_assemble_pair_i2()
       call mma_assemble_pair(vp, vi10, vi11)
       end subroutine test_mma_assemble_pair_i2
 
-!CHECK: @test_mma_assemble_pair_i2_
-!CHECK:  %1 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %2 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %3 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %4 = load <8 x i16>, ptr %1, align 16
-!CHECK:  %5 = load <8 x i16>, ptr %2, align 16
-!CHECK:  %6 = bitcast <8 x i16> %4 to <16 x i8>
-!CHECK:  %7 = bitcast <8 x i16> %5 to <16 x i8>
-!CHECK:  %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
-!CHECK:  store <256 x i1> %8, ptr %3, align 32
+!LLVMIR: @test_mma_assemble_pair_i2_
+!LLVMIR:  %1 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %2 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %3 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %4 = load <8 x i16>, ptr %1, align 16
+!LLVMIR:  %5 = load <8 x i16>, ptr %2, align 16
+!LLVMIR:  %6 = bitcast <8 x i16> %4 to <16 x i8>
+!LLVMIR:  %7 = bitcast <8 x i16> %5 to <16 x i8>
+!LLVMIR:  %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
+!LLVMIR:  store <256 x i1> %8, ptr %3, align 32
 
       subroutine test_mma_assemble_pair_i4()
       use, intrinsic :: mma
@@ -293,16 +293,16 @@ subroutine test_mma_assemble_pair_i4()
       call mma_assemble_pair(vp, vi10, vi11)
       end subroutine test_mma_assemble_pair_i4
 
-!CHECK: @test_mma_assemble_pair_i4_
-!CHECK:  %1 = alloca <4 x i32>, i64 1, align 16
-!CHECK:  %2 = alloca <4 x i32>, i64 1, align 16
-!CHECK:  %3 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %4 = load <4 x i32>, ptr %1, align 16
-!CHECK:  %5 = load <4 x i32>, ptr %2, align 16
-!CHECK:  %6 = bitcast <4 x i32> %4 to <16 x i8>
-!CHECK:  %7 = bitcast <4 x i32> %5 to <16 x i8>
-!CHECK:  %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
-!CHECK:  store <256 x i1> %8, ptr %3, align 32
+!LLVMIR: @test_mma_assemble_pair_i4_
+!LLVMIR:  %1 = alloca <4 x i32>, i64 1, align 16
+!LLVMIR:  %2 = alloca <4 x i32>, i64 1, align 16
+!LLVMIR:  %3 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %4 = load <4 x i32>, ptr %1, align 16
+!LLVMIR:  %5 = load <4 x i32>, ptr %2, align 16
+!LLVMIR:  %6 = bitcast <4 x i32> %4 to <16 x i8>
+!LLVMIR:  %7 = bitcast <4 x i32> %5 to <16 x i8>
+!LLVMIR:  %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
+!LLVMIR:  store <256 x i1> %8, ptr %3, align 32
 
       subroutine test_mma_assemble_pair_i8()
       use, intrinsic :: mma
@@ -312,16 +312,16 @@ subroutine test_mma_assemble_pair_i8()
       call mma_assemble_pair(vp, vi10, vi11)
       end subroutine test_mma_assemble_pair_i8
 
-!CHECK: @test_mma_assemble_pair_i8_
-!CHECK:  %1 = alloca <2 x i64>, i64 1, align 16
-!CHECK:  %2 = alloca <2 x i64>, i64 1, align 16
-!CHECK:  %3 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %4 = load <2 x i64>, ptr %1, align 16
-!CHECK:  %5 = load <2 x i64>, ptr %2, align 16
-!CHECK:  %6 = bitcast <2 x i64> %4 to <16 x i8>
-!CHECK:  %7 = bitcast <2 x i64> %5 to <16 x i8>
-!CHECK:  %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
-!CHECK:  store <256 x i1> %8, ptr %3, align 32
+!LLVMIR: @test_mma_assemble_pair_i8_
+!LLVMIR:  %1 = alloca <2 x i64>, i64 1, align 16
+!LLVMIR:  %2 = alloca <2 x i64>, i64 1, align 16
+!LLVMIR:  %3 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %4 = load <2 x i64>, ptr %1, align 16
+!LLVMIR:  %5 = load <2 x i64>, ptr %2, align 16
+!LLVMIR:  %6 = bitcast <2 x i64> %4 to <16 x i8>
+!LLVMIR:  %7 = bitcast <2 x i64> %5 to <16 x i8>
+!LLVMIR:  %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
+!LLVMIR:  store <256 x i1> %8, ptr %3, align 32
 
       subroutine test_mma_assemble_pair_u1()
       use, intrinsic :: mma
@@ -331,14 +331,14 @@ subroutine test_mma_assemble_pair_u1()
       call mma_assemble_pair(vp, vi10, vi11)
       end subroutine test_mma_assemble_pair_u1
 
-!CHECK: @test_mma_assemble_pair_u1_
-!CHECK:  %1 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %4 = load <16 x i8>, ptr %1, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %6 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <256 x i1> %6, ptr %3, align 32
+!LLVMIR: @test_mma_assemble_pair_u1_
+!LLVMIR:  %1 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %4 = load <16 x i8>, ptr %1, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %6 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <256 x i1> %6, ptr %3, align 32
 
       subroutine test_mma_assemble_pair_u2()
       use, intrinsic :: mma
@@ -348,16 +348,16 @@ subroutine test_mma_assemble_pair_u2()
       call mma_assemble_pair(vp, vi10, vi11)
       end subroutine test_mma_assemble_pair_u2
 
-!CHECK: @test_mma_assemble_pair_u2_
-!CHECK:  %1 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %2 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %3 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %4 = load <8 x i16>, ptr %1, align 16
-!CHECK:  %5 = load <8 x i16>, ptr %2, align 16
-!CHECK:  %6 = bitcast <8 x i16> %4 to <16 x i8>
-!CHECK:  %7 = bitcast <8 x i16> %5 to <16 x i8>
-!CHECK:  %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
-!CHECK:  store <256 x i1> %8, ptr %3, align 32
+!LLVMIR: @test_mma_assemble_pair_u2_
+!LLVMIR:  %1 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %2 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %3 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %4 = load <8 x i16>, ptr %1, align 16
+!LLVMIR:  %5 = load <8 x i16>, ptr %2, align 16
+!LLVMIR:  %6 = bitcast <8 x i16> %4 to <16 x i8>
+!LLVMIR:  %7 = bitcast <8 x i16> %5 to <16 x i8>
+!LLVMIR:  %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
+!LLVMIR:  store <256 x i1> %8, ptr %3, align 32
 
       subroutine test_mma_assemble_pair_u4()
       use, intrinsic :: mma
@@ -367,16 +367,16 @@ subroutine test_mma_assemble_pair_u4()
       call mma_assemble_pair(vp, vi10, vi11)
       end subroutine test_mma_assemble_pair_u4
 
-!CHECK: @test_mma_assemble_pair_u4_
-!CHECK:  %1 = alloca <4 x i32>, i64 1, align 16
-!CHECK:  %2 = alloca <4 x i32>, i64 1, align 16
-!CHECK:  %3 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %4 = load <4 x i32>, ptr %1, align 16
-!CHECK:  %5 = load <4 x i32>, ptr %2, align 16
-!CHECK:  %6 = bitcast <4 x i32> %4 to <16 x i8>
-!CHECK:  %7 = bitcast <4 x i32> %5 to <16 x i8>
-!CHECK:  %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
-!CHECK:  store <256 x i1> %8, ptr %3, align 32
+!LLVMIR: @test_mma_assemble_pair_u4_
+!LLVMIR:  %1 = alloca <4 x i32>, i64 1, align 16
+!LLVMIR:  %2 = alloca <4 x i32>, i64 1, align 16
+!LLVMIR:  %3 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %4 = load <4 x i32>, ptr %1, align 16
+!LLVMIR:  %5 = load <4 x i32>, ptr %2, align 16
+!LLVMIR:  %6 = bitcast <4 x i32> %4 to <16 x i8>
+!LLVMIR:  %7 = bitcast <4 x i32> %5 to <16 x i8>
+!LLVMIR:  %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
+!LLVMIR:  store <256 x i1> %8, ptr %3, align 32
 
       subroutine test_mma_assemble_pair_u8()
       use, intrinsic :: mma
@@ -386,16 +386,16 @@ subroutine test_mma_assemble_pair_u8()
       call mma_assemble_pair(vp, vi10, vi11)
       end subroutine test_mma_assemble_pair_u8
 
-!CHECK: @test_mma_assemble_pair_u8_
-!CHECK:  %1 = alloca <2 x i64>, i64 1, align 16
-!CHECK:  %2 = alloca <2 x i64>, i64 1, align 16
-!CHECK:  %3 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %4 = load <2 x i64>, ptr %1, align 16
-!CHECK:  %5 = load <2 x i64>, ptr %2, align 16
-!CHECK:  %6 = bitcast <2 x i64> %4 to <16 x i8>
-!CHECK:  %7 = bitcast <2 x i64> %5 to <16 x i8>
-!CHECK:  %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
-!CHECK:  store <256 x i1> %8, ptr %3, align 32
+!LLVMIR: @test_mma_assemble_pair_u8_
+!LLVMIR:  %1 = alloca <2 x i64>, i64 1, align 16
+!LLVMIR:  %2 = alloca <2 x i64>, i64 1, align 16
+!LLVMIR:  %3 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %4 = load <2 x i64>, ptr %1, align 16
+!LLVMIR:  %5 = load <2 x i64>, ptr %2, align 16
+!LLVMIR:  %6 = bitcast <2 x i64> %4 to <16 x i8>
+!LLVMIR:  %7 = bitcast <2 x i64> %5 to <16 x i8>
+!LLVMIR:  %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
+!LLVMIR:  store <256 x i1> %8, ptr %3, align 32
 
       subroutine test_mma_assemble_pair_r4()
       use, intrinsic :: mma
@@ -405,16 +405,16 @@ subroutine test_mma_assemble_pair_r4()
       call mma_assemble_pair(vp, vi10, vi11)
       end subroutine test_mma_assemble_pair_r4
 
-!CHECK: @test_mma_assemble_pair_r4_
-!CHECK:  %1 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %2 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %3 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %4 = load <4 x float>, ptr %1, align 16
-!CHECK:  %5 = load <4 x float>, ptr %2, align 16
-!CHECK:  %6 = bitcast <4 x float> %4 to <16 x i8>
-!CHECK:  %7 = bitcast <4 x float> %5 to <16 x i8>
-!CHECK:  %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
-!CHECK:  store <256 x i1> %8, ptr %3, align 32
+!LLVMIR: @test_mma_assemble_pair_r4_
+!LLVMIR:  %1 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %2 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %3 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %4 = load <4 x float>, ptr %1, align 16
+!LLVMIR:  %5 = load <4 x float>, ptr %2, align 16
+!LLVMIR:  %6 = bitcast <4 x float> %4 to <16 x i8>
+!LLVMIR:  %7 = bitcast <4 x float> %5 to <16 x i8>
+!LLVMIR:  %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
+!LLVMIR:  store <256 x i1> %8, ptr %3, align 32
 
       subroutine test_mma_assemble_pair_r8()
       use, intrinsic :: mma
@@ -424,16 +424,16 @@ subroutine test_mma_assemble_pair_r8()
       call mma_assemble_pair(vp, vi10, vi11)
       end subroutine test_mma_assemble_pair_r8
 
-!CHECK: @test_mma_assemble_pair_r8_
-!CHECK:  %1 = alloca <2 x double>, i64 1, align 16
-!CHECK:  %2 = alloca <2 x double>, i64 1, align 16
-!CHECK:  %3 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %4 = load <2 x double>, ptr %1, align 16
-!CHECK:  %5 = load <2 x double>, ptr %2, align 16
-!CHECK:  %6 = bitcast <2 x double> %4 to <16 x i8>
-!CHECK:  %7 = bitcast <2 x double> %5 to <16 x i8>
-!CHECK:  %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
-!CHECK:  store <256 x i1> %8, ptr %3, align 32
+!LLVMIR: @test_mma_assemble_pair_r8_
+!LLVMIR:  %1 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:  %2 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:  %3 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %4 = load <2 x double>, ptr %1, align 16
+!LLVMIR:  %5 = load <2 x double>, ptr %2, align 16
+!LLVMIR:  %6 = bitcast <2 x double> %4 to <16 x i8>
+!LLVMIR:  %7 = bitcast <2 x double> %5 to <16 x i8>
+!LLVMIR:  %8 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %6, <16 x i8> %7)
+!LLVMIR:  store <256 x i1> %8, ptr %3, align 32
 
 ! mma_disassemble_acc
 
@@ -446,17 +446,17 @@ subroutine test_mma_build_acc_i1()
       end subroutine test_mma_build_acc_i1
 
 !CHECK-LABEL: @test_mma_build_acc_i1
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %5 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %6 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %7 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %8 = load <16 x i8>, ptr %4, align 16
-!CHECK:  %9 = load <16 x i8>, ptr %5, align 16
-!CHECK:  %10 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %9, <16 x i8> %8, <16 x i8> %7, <16 x i8> %6)
-!CHECK:  store <512 x i1> %10, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %5 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %6 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %7 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %8 = load <16 x i8>, ptr %4, align 16
+!LLVMIR:  %9 = load <16 x i8>, ptr %5, align 16
+!LLVMIR:  %10 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %9, <16 x i8> %8, <16 x i8> %7, <16 x i8> %6)
+!LLVMIR:  store <512 x i1> %10, ptr %1, align 64
 
       subroutine test_mma_build_acc_i2()
       use, intrinsic :: mma
@@ -467,21 +467,21 @@ subroutine test_mma_build_acc_i2()
       end subroutine test_mma_build_acc_i2
 
 !CHECK-LABEL: @test_mma_build_acc_i2
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %3 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %4 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %5 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %6 = load <8 x i16>, ptr %2, align 16
-!CHECK:  %7 = load <8 x i16>, ptr %3, align 16
-!CHECK:  %8 = load <8 x i16>, ptr %4, align 16
-!CHECK:  %9 = load <8 x i16>, ptr %5, align 16
-!CHECK:  %10 = bitcast <8 x i16> %9 to <16 x i8>
-!CHECK:  %11 = bitcast <8 x i16> %8 to <16 x i8>
-!CHECK:  %12 = bitcast <8 x i16> %7 to <16 x i8>
-!CHECK:  %13 = bitcast <8 x i16> %6 to <16 x i8>
-!CHECK:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
-!CHECK:  store <512 x i1> %14, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %3 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %4 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %5 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %6 = load <8 x i16>, ptr %2, align 16
+!LLVMIR:  %7 = load <8 x i16>, ptr %3, align 16
+!LLVMIR:  %8 = load <8 x i16>, ptr %4, align 16
+!LLVMIR:  %9 = load <8 x i16>, ptr %5, align 16
+!LLVMIR:  %10 = bitcast <8 x i16> %9 to <16 x i8>
+!LLVMIR:  %11 = bitcast <8 x i16> %8 to <16 x i8>
+!LLVMIR:  %12 = bitcast <8 x i16> %7 to <16 x i8>
+!LLVMIR:  %13 = bitcast <8 x i16> %6 to <16 x i8>
+!LLVMIR:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
+!LLVMIR:  store <512 x i1> %14, ptr %1, align 64
 
       subroutine test_mma_build_acc_i4()
       use, intrinsic :: mma
@@ -492,21 +492,21 @@ subroutine test_mma_build_acc_i4()
       end subroutine test_mma_build_acc_i4
 
 !CHECK-LABEL: @test_mma_build_acc_i4
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <4 x i32>, i64 1, align 16
-!CHECK:  %3 = alloca <4 x i32>, i64 1, align 16
-!CHECK:  %4 = alloca <4 x i32>, i64 1, align 16
-!CHECK:  %5 = alloca <4 x i32>, i64 1, align 16
-!CHECK:  %6 = load <4 x i32>, ptr %2, align 16
-!CHECK:  %7 = load <4 x i32>, ptr %3, align 16
-!CHECK:  %8 = load <4 x i32>, ptr %4, align 16
-!CHECK:  %9 = load <4 x i32>, ptr %5, align 16
-!CHECK:  %10 = bitcast <4 x i32> %9 to <16 x i8>
-!CHECK:  %11 = bitcast <4 x i32> %8 to <16 x i8>
-!CHECK:  %12 = bitcast <4 x i32> %7 to <16 x i8>
-!CHECK:  %13 = bitcast <4 x i32> %6 to <16 x i8>
-!CHECK:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
-!CHECK:  store <512 x i1> %14, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <4 x i32>, i64 1, align 16
+!LLVMIR:  %3 = alloca <4 x i32>, i64 1, align 16
+!LLVMIR:  %4 = alloca <4 x i32>, i64 1, align 16
+!LLVMIR:  %5 = alloca <4 x i32>, i64 1, align 16
+!LLVMIR:  %6 = load <4 x i32>, ptr %2, align 16
+!LLVMIR:  %7 = load <4 x i32>, ptr %3, align 16
+!LLVMIR:  %8 = load <4 x i32>, ptr %4, align 16
+!LLVMIR:  %9 = load <4 x i32>, ptr %5, align 16
+!LLVMIR:  %10 = bitcast <4 x i32> %9 to <16 x i8>
+!LLVMIR:  %11 = bitcast <4 x i32> %8 to <16 x i8>
+!LLVMIR:  %12 = bitcast <4 x i32> %7 to <16 x i8>
+!LLVMIR:  %13 = bitcast <4 x i32> %6 to <16 x i8>
+!LLVMIR:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
+!LLVMIR:  store <512 x i1> %14, ptr %1, align 64
 
       subroutine test_mma_build_acc_i8()
       use, intrinsic :: mma
@@ -517,21 +517,21 @@ subroutine test_mma_build_acc_i8()
       end subroutine test_mma_build_acc_i8
 
 !CHECK-LABEL: @test_mma_build_acc_i8
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <2 x i64>, i64 1, align 16
-!CHECK:  %3 = alloca <2 x i64>, i64 1, align 16
-!CHECK:  %4 = alloca <2 x i64>, i64 1, align 16
-!CHECK:  %5 = alloca <2 x i64>, i64 1, align 16
-!CHECK:  %6 = load <2 x i64>, ptr %2, align 16
-!CHECK:  %7 = load <2 x i64>, ptr %3, align 16
-!CHECK:  %8 = load <2 x i64>, ptr %4, align 16
-!CHECK:  %9 = load <2 x i64>, ptr %5, align 16
-!CHECK:  %10 = bitcast <2 x i64> %9 to <16 x i8>
-!CHECK:  %11 = bitcast <2 x i64> %8 to <16 x i8>
-!CHECK:  %12 = bitcast <2 x i64> %7 to <16 x i8>
-!CHECK:  %13 = bitcast <2 x i64> %6 to <16 x i8>
-!CHECK:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
-!CHECK:  store <512 x i1> %14, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <2 x i64>, i64 1, align 16
+!LLVMIR:  %3 = alloca <2 x i64>, i64 1, align 16
+!LLVMIR:  %4 = alloca <2 x i64>, i64 1, align 16
+!LLVMIR:  %5 = alloca <2 x i64>, i64 1, align 16
+!LLVMIR:  %6 = load <2 x i64>, ptr %2, align 16
+!LLVMIR:  %7 = load <2 x i64>, ptr %3, align 16
+!LLVMIR:  %8 = load <2 x i64>, ptr %4, align 16
+!LLVMIR:  %9 = load <2 x i64>, ptr %5, align 16
+!LLVMIR:  %10 = bitcast <2 x i64> %9 to <16 x i8>
+!LLVMIR:  %11 = bitcast <2 x i64> %8 to <16 x i8>
+!LLVMIR:  %12 = bitcast <2 x i64> %7 to <16 x i8>
+!LLVMIR:  %13 = bitcast <2 x i64> %6 to <16 x i8>
+!LLVMIR:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
+!LLVMIR:  store <512 x i1> %14, ptr %1, align 64
 
       subroutine test_mma_build_acc_u1()
       use, intrinsic :: mma
@@ -542,17 +542,17 @@ subroutine test_mma_build_acc_u1()
       end subroutine test_mma_build_acc_u1
 
 !CHECK-LABEL: @test_mma_build_acc_u1
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %5 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %6 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %7 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %8 = load <16 x i8>, ptr %4, align 16
-!CHECK:  %9 = load <16 x i8>, ptr %5, align 16
-!CHECK:  %10 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %9, <16 x i8> %8, <16 x i8> %7, <16 x i8> %6)
-!CHECK:  store <512 x i1> %10, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %5 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %6 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %7 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %8 = load <16 x i8>, ptr %4, align 16
+!LLVMIR:  %9 = load <16 x i8>, ptr %5, align 16
+!LLVMIR:  %10 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %9, <16 x i8> %8, <16 x i8> %7, <16 x i8> %6)
+!LLVMIR:  store <512 x i1> %10, ptr %1, align 64
 
       subroutine test_mma_build_acc_u2()
       use, intrinsic :: mma
@@ -563,21 +563,21 @@ subroutine test_mma_build_acc_u2()
       end subroutine test_mma_build_acc_u2
 
 !CHECK-LABEL: @test_mma_build_acc_u2
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %3 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %4 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %5 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %6 = load <8 x i16>, ptr %2, align 16
-!CHECK:  %7 = load <8 x i16>, ptr %3, align 16
-!CHECK:  %8 = load <8 x i16>, ptr %4, align 16
-!CHECK:  %9 = load <8 x i16>, ptr %5, align 16
-!CHECK:  %10 = bitcast <8 x i16> %9 to <16 x i8>
-!CHECK:  %11 = bitcast <8 x i16> %8 to <16 x i8>
-!CHECK:  %12 = bitcast <8 x i16> %7 to <16 x i8>
-!CHECK:  %13 = bitcast <8 x i16> %6 to <16 x i8>
-!CHECK:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
-!CHECK:  store <512 x i1> %14, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %3 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %4 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %5 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %6 = load <8 x i16>, ptr %2, align 16
+!LLVMIR:  %7 = load <8 x i16>, ptr %3, align 16
+!LLVMIR:  %8 = load <8 x i16>, ptr %4, align 16
+!LLVMIR:  %9 = load <8 x i16>, ptr %5, align 16
+!LLVMIR:  %10 = bitcast <8 x i16> %9 to <16 x i8>
+!LLVMIR:  %11 = bitcast <8 x i16> %8 to <16 x i8>
+!LLVMIR:  %12 = bitcast <8 x i16> %7 to <16 x i8>
+!LLVMIR:  %13 = bitcast <8 x i16> %6 to <16 x i8>
+!LLVMIR:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
+!LLVMIR:  store <512 x i1> %14, ptr %1, align 64
 
       subroutine test_mma_build_acc_u4()
       use, intrinsic :: mma
@@ -588,21 +588,21 @@ subroutine test_mma_build_acc_u4()
       end subroutine test_mma_build_acc_u4
 
 !CHECK-LABEL: @test_mma_build_acc_u4
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <4 x i32>, i64 1, align 16
-!CHECK:  %3 = alloca <4 x i32>, i64 1, align 16
-!CHECK:  %4 = alloca <4 x i32>, i64 1, align 16
-!CHECK:  %5 = alloca <4 x i32>, i64 1, align 16
-!CHECK:  %6 = load <4 x i32>, ptr %2, align 16
-!CHECK:  %7 = load <4 x i32>, ptr %3, align 16
-!CHECK:  %8 = load <4 x i32>, ptr %4, align 16
-!CHECK:  %9 = load <4 x i32>, ptr %5, align 16
-!CHECK:  %10 = bitcast <4 x i32> %9 to <16 x i8>
-!CHECK:  %11 = bitcast <4 x i32> %8 to <16 x i8>
-!CHECK:  %12 = bitcast <4 x i32> %7 to <16 x i8>
-!CHECK:  %13 = bitcast <4 x i32> %6 to <16 x i8>
-!CHECK:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
-!CHECK:  store <512 x i1> %14, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <4 x i32>, i64 1, align 16
+!LLVMIR:  %3 = alloca <4 x i32>, i64 1, align 16
+!LLVMIR:  %4 = alloca <4 x i32>, i64 1, align 16
+!LLVMIR:  %5 = alloca <4 x i32>, i64 1, align 16
+!LLVMIR:  %6 = load <4 x i32>, ptr %2, align 16
+!LLVMIR:  %7 = load <4 x i32>, ptr %3, align 16
+!LLVMIR:  %8 = load <4 x i32>, ptr %4, align 16
+!LLVMIR:  %9 = load <4 x i32>, ptr %5, align 16
+!LLVMIR:  %10 = bitcast <4 x i32> %9 to <16 x i8>
+!LLVMIR:  %11 = bitcast <4 x i32> %8 to <16 x i8>
+!LLVMIR:  %12 = bitcast <4 x i32> %7 to <16 x i8>
+!LLVMIR:  %13 = bitcast <4 x i32> %6 to <16 x i8>
+!LLVMIR:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
+!LLVMIR:  store <512 x i1> %14, ptr %1, align 64
 
       subroutine test_mma_build_acc_u8()
       use, intrinsic :: mma
@@ -613,21 +613,21 @@ subroutine test_mma_build_acc_u8()
       end subroutine test_mma_build_acc_u8
 
 !CHECK-LABEL: @test_mma_build_acc_u8
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <2 x i64>, i64 1, align 16
-!CHECK:  %3 = alloca <2 x i64>, i64 1, align 16
-!CHECK:  %4 = alloca <2 x i64>, i64 1, align 16
-!CHECK:  %5 = alloca <2 x i64>, i64 1, align 16
-!CHECK:  %6 = load <2 x i64>, ptr %2, align 16
-!CHECK:  %7 = load <2 x i64>, ptr %3, align 16
-!CHECK:  %8 = load <2 x i64>, ptr %4, align 16
-!CHECK:  %9 = load <2 x i64>, ptr %5, align 16
-!CHECK:  %10 = bitcast <2 x i64> %9 to <16 x i8>
-!CHECK:  %11 = bitcast <2 x i64> %8 to <16 x i8>
-!CHECK:  %12 = bitcast <2 x i64> %7 to <16 x i8>
-!CHECK:  %13 = bitcast <2 x i64> %6 to <16 x i8>
-!CHECK:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
-!CHECK:  store <512 x i1> %14, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <2 x i64>, i64 1, align 16
+!LLVMIR:  %3 = alloca <2 x i64>, i64 1, align 16
+!LLVMIR:  %4 = alloca <2 x i64>, i64 1, align 16
+!LLVMIR:  %5 = alloca <2 x i64>, i64 1, align 16
+!LLVMIR:  %6 = load <2 x i64>, ptr %2, align 16
+!LLVMIR:  %7 = load <2 x i64>, ptr %3, align 16
+!LLVMIR:  %8 = load <2 x i64>, ptr %4, align 16
+!LLVMIR:  %9 = load <2 x i64>, ptr %5, align 16
+!LLVMIR:  %10 = bitcast <2 x i64> %9 to <16 x i8>
+!LLVMIR:  %11 = bitcast <2 x i64> %8 to <16 x i8>
+!LLVMIR:  %12 = bitcast <2 x i64> %7 to <16 x i8>
+!LLVMIR:  %13 = bitcast <2 x i64> %6 to <16 x i8>
+!LLVMIR:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
+!LLVMIR:  store <512 x i1> %14, ptr %1, align 64
 
 
       subroutine test_mma_build_acc_r4()
@@ -639,21 +639,21 @@ subroutine test_mma_build_acc_r4()
       end subroutine test_mma_build_acc_r4
 
 !CHECK-LABEL: @test_mma_build_acc_r4
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %3 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %4 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %5 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %6 = load <4 x float>, ptr %2, align 16
-!CHECK:  %7 = load <4 x float>, ptr %3, align 16
-!CHECK:  %8 = load <4 x float>, ptr %4, align 16
-!CHECK:  %9 = load <4 x float>, ptr %5, align 16
-!CHECK:  %10 = bitcast <4 x float> %9 to <16 x i8>
-!CHECK:  %11 = bitcast <4 x float> %8 to <16 x i8>
-!CHECK:  %12 = bitcast <4 x float> %7 to <16 x i8>
-!CHECK:  %13 = bitcast <4 x float> %6 to <16 x i8>
-!CHECK:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
-!CHECK:  store <512 x i1> %14, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %3 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %4 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %5 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %6 = load <4 x float>, ptr %2, align 16
+!LLVMIR:  %7 = load <4 x float>, ptr %3, align 16
+!LLVMIR:  %8 = load <4 x float>, ptr %4, align 16
+!LLVMIR:  %9 = load <4 x float>, ptr %5, align 16
+!LLVMIR:  %10 = bitcast <4 x float> %9 to <16 x i8>
+!LLVMIR:  %11 = bitcast <4 x float> %8 to <16 x i8>
+!LLVMIR:  %12 = bitcast <4 x float> %7 to <16 x i8>
+!LLVMIR:  %13 = bitcast <4 x float> %6 to <16 x i8>
+!LLVMIR:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
+!LLVMIR:  store <512 x i1> %14, ptr %1, align 64
 
 
       subroutine test_mma_build_acc_r8()
@@ -665,21 +665,21 @@ subroutine test_mma_build_acc_r8()
       end subroutine test_mma_build_acc_r8
 
 !CHECK-LABEL: @test_mma_build_acc_r8
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <2 x double>, i64 1, align 16
-!CHECK:  %3 = alloca <2 x double>, i64 1, align 16
-!CHECK:  %4 = alloca <2 x double>, i64 1, align 16
-!CHECK:  %5 = alloca <2 x double>, i64 1, align 16
-!CHECK:  %6 = load <2 x double>, ptr %2, align 16
-!CHECK:  %7 = load <2 x double>, ptr %3, align 16
-!CHECK:  %8 = load <2 x double>, ptr %4, align 16
-!CHECK:  %9 = load <2 x double>, ptr %5, align 16
-!CHECK:  %10 = bitcast <2 x double> %9 to <16 x i8>
-!CHECK:  %11 = bitcast <2 x double> %8 to <16 x i8>
-!CHECK:  %12 = bitcast <2 x double> %7 to <16 x i8>
-!CHECK:  %13 = bitcast <2 x double> %6 to <16 x i8>
-!CHECK:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
-!CHECK:  store <512 x i1> %14, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:  %3 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:  %4 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:  %5 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:  %6 = load <2 x double>, ptr %2, align 16
+!LLVMIR:  %7 = load <2 x double>, ptr %3, align 16
+!LLVMIR:  %8 = load <2 x double>, ptr %4, align 16
+!LLVMIR:  %9 = load <2 x double>, ptr %5, align 16
+!LLVMIR:  %10 = bitcast <2 x double> %9 to <16 x i8>
+!LLVMIR:  %11 = bitcast <2 x double> %8 to <16 x i8>
+!LLVMIR:  %12 = bitcast <2 x double> %7 to <16 x i8>
+!LLVMIR:  %13 = bitcast <2 x double> %6 to <16 x i8>
+!LLVMIR:  %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
+!LLVMIR:  store <512 x i1> %14, ptr %1, align 64
 
 ! mma_disassemble_acc
 
@@ -692,11 +692,11 @@ subroutine test_disassemble_acc()
       end subroutine
 
 !CHECK-LABEL: @test_disassemble_acc_
-!CHECK:  %1 = alloca float, i64 1, align 4
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %3)
-!CHECK:  store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %4, ptr %1, align 16
+!LLVMIR:  %1 = alloca float, i64 1, align 4
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %3)
+!LLVMIR:  store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %4, ptr %1, align 16
 
 ! mma_disassemble_pair
 
@@ -709,8 +709,8 @@ subroutine test_disassemble_pair()
       end subroutine
 
 !CHECK-LABEL: @test_disassemble_pair_
-!CHECK:  %1 = alloca float, i64 1, align 4
-!CHECK:  %2 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %3 = load <256 x i1>, ptr %2, align 32
-!CHECK:  %4 = call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %3)
-!CHECK:  store { <16 x i8>, <16 x i8> } %4, ptr %1, align 16
+!LLVMIR:  %1 = alloca float, i64 1, align 4
+!LLVMIR:  %2 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %3 = load <256 x i1>, ptr %2, align 32
+!LLVMIR:  %4 = call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %3)
+!LLVMIR:  store { <16 x i8>, <16 x i8> } %4, ptr %1, align 16

diff  --git a/flang/test/Lower/PowerPC/ppc-mma-outer-product-1.f90 b/flang/test/Lower/PowerPC/ppc-mma-outer-product-1.f90
index 96c7d65a1817ab8..97bebc7683c0216 100644
--- a/flang/test/Lower/PowerPC/ppc-mma-outer-product-1.f90
+++ b/flang/test/Lower/PowerPC/ppc-mma-outer-product-1.f90
@@ -1,4 +1,4 @@
-! RUN: %flang_fc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -triple powerpc64le-unknown-unknown -target-cpu pwr10 -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
       subroutine test_pmxvbf16ger2_def()
@@ -10,13 +10,13 @@ subroutine test_pmxvbf16ger2_def()
       end subroutine test_pmxvbf16ger2_def
 
 !CHECK-LABEL: @test_pmxvbf16ger2_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %6, ptr %1, align 64
 
 
       subroutine test_pmxvbf16ger2_non_def()
@@ -28,13 +28,13 @@ subroutine test_pmxvbf16ger2_non_def()
       end subroutine test_pmxvbf16ger2_non_def
 
 !CHECK-LABEL: @test_pmxvbf16ger2_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %6, ptr %1, align 64
 
 
       subroutine test_pmxvbf16ger2nn_def()
@@ -46,14 +46,14 @@ subroutine test_pmxvbf16ger2nn_def()
       end subroutine test_pmxvbf16ger2nn_def
 
 !CHECK-LABEL: @test_pmxvbf16ger2nn_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvbf16ger2nn_non_def()
       use, intrinsic :: mma
@@ -64,14 +64,14 @@ subroutine test_pmxvbf16ger2nn_non_def()
       end subroutine test_pmxvbf16ger2nn_non_def
 
 !CHECK-LABEL: @test_pmxvbf16ger2nn_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvbf16ger2np_def()
       use, intrinsic :: mma
@@ -82,14 +82,14 @@ subroutine test_pmxvbf16ger2np_def()
       end subroutine test_pmxvbf16ger2np_def
 
 !CHECK-LABEL: @test_pmxvbf16ger2np_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvbf16ger2np_non_def()
       use, intrinsic :: mma
@@ -100,14 +100,14 @@ subroutine test_pmxvbf16ger2np_non_def()
       end subroutine test_pmxvbf16ger2np_non_def
 
 !CHECK-LABEL: @test_pmxvbf16ger2np_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvbf16ger2pn_def()
       use, intrinsic :: mma
@@ -118,14 +118,14 @@ subroutine test_pmxvbf16ger2pn_def()
       end subroutine test_pmxvbf16ger2pn_def
 
 !CHECK-LABEL: @test_pmxvbf16ger2pn_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvbf16ger2pn_non_def()
       use, intrinsic :: mma
@@ -136,14 +136,14 @@ subroutine test_pmxvbf16ger2pn_non_def()
       end subroutine test_pmxvbf16ger2pn_non_def
 
 !CHECK-LABEL: @test_pmxvbf16ger2pn_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvbf16ger2pp_def()
       use, intrinsic :: mma
@@ -154,14 +154,14 @@ subroutine test_pmxvbf16ger2pp_def()
       end subroutine test_pmxvbf16ger2pp_def
 
 !CHECK-LABEL: @test_pmxvbf16ger2pp_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvbf16ger2pp_non_def()
       use, intrinsic :: mma
@@ -172,14 +172,14 @@ subroutine test_pmxvbf16ger2pp_non_def()
       end subroutine test_pmxvbf16ger2pp_non_def
 
 !CHECK-LABEL: @test_pmxvbf16ger2pp_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvf16ger2_def()
       use, intrinsic :: mma
@@ -190,13 +190,13 @@ subroutine test_pmxvf16ger2_def()
       end subroutine test_pmxvf16ger2_def
 
 !CHECK-LABEL: @test_pmxvf16ger2_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %6, ptr %1, align 64
 
       subroutine test_pmxvf16ger2_non_def()
       use, intrinsic :: mma
@@ -207,13 +207,13 @@ subroutine test_pmxvf16ger2_non_def()
       end subroutine test_pmxvf16ger2_non_def
 
 !CHECK-LABEL: @test_pmxvf16ger2_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %6, ptr %1, align 64
 
       subroutine test_pmxvf16ger2nn_def()
       use, intrinsic :: mma
@@ -224,14 +224,14 @@ subroutine test_pmxvf16ger2nn_def()
       end subroutine test_pmxvf16ger2nn_def
 
 !CHECK-LABEL: @test_pmxvf16ger2nn_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvf16ger2nn_non_def()
       use, intrinsic :: mma
@@ -242,14 +242,14 @@ subroutine test_pmxvf16ger2nn_non_def()
       end subroutine test_pmxvf16ger2nn_non_def
 
 !CHECK-LABEL: @test_pmxvf16ger2nn_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvf16ger2np_def()
       use, intrinsic :: mma
@@ -260,14 +260,14 @@ subroutine test_pmxvf16ger2np_def()
       end subroutine test_pmxvf16ger2np_def
 
 !CHECK-LABEL: @test_pmxvf16ger2np_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvf16ger2np_non_def()
       use, intrinsic :: mma
@@ -278,14 +278,14 @@ subroutine test_pmxvf16ger2np_non_def()
       end subroutine test_pmxvf16ger2np_non_def
 
 !CHECK-LABEL: @test_pmxvf16ger2np_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvf16ger2pn_def()
       use, intrinsic :: mma
@@ -296,14 +296,14 @@ subroutine test_pmxvf16ger2pn_def()
       end subroutine test_pmxvf16ger2pn_def
 
 !CHECK-LABEL: @test_pmxvf16ger2pn_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvf16ger2pn_non_def()
       use, intrinsic :: mma
@@ -314,14 +314,14 @@ subroutine test_pmxvf16ger2pn_non_def()
       end subroutine test_pmxvf16ger2pn_non_def
 
 !CHECK-LABEL: @test_pmxvf16ger2pn_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvf16ger2pp_def()
       use, intrinsic :: mma
@@ -332,14 +332,14 @@ subroutine test_pmxvf16ger2pp_def()
       end subroutine test_pmxvf16ger2pp_def
 
 !CHECK-LABEL: @test_pmxvf16ger2pp_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvf16ger2pp_non_def()
       use, intrinsic :: mma
@@ -350,14 +350,14 @@ subroutine test_pmxvf16ger2pp_non_def()
       end subroutine test_pmxvf16ger2pp_non_def
 
 !CHECK-LABEL: @test_pmxvf16ger2pp_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvf32ger_u1_def()
       use, intrinsic :: mma
@@ -368,13 +368,13 @@ subroutine test_pmxvf32ger_u1_def()
       end subroutine test_pmxvf32ger_u1_def
 
 !CHECK-LABEL: @test_pmxvf32ger_u1_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
-!CHECK:  store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %6, ptr %1, align 64
 
       subroutine test_pmxvf32ger_u1_non_def()
       use, intrinsic :: mma
@@ -385,13 +385,13 @@ subroutine test_pmxvf32ger_u1_non_def()
       end subroutine test_pmxvf32ger_u1_non_def
 
 !CHECK-LABEL: @test_pmxvf32ger_u1_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
-!CHECK:  store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %6, ptr %1, align 64
 
       subroutine test_pmxvf32ger_r4_def()
       use, intrinsic :: mma
@@ -402,15 +402,15 @@ subroutine test_pmxvf32ger_r4_def()
       end subroutine test_pmxvf32ger_r4_def
 
 !CHECK-LABEL: @test_pmxvf32ger_r4_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %3 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %4 = load <4 x float>, ptr %2, align 16
-!CHECK:  %5 = load <4 x float>, ptr %3, align 16
-!CHECK:  %6 = bitcast <4 x float> %4 to <16 x i8>
-!CHECK:  %7 = bitcast <4 x float> %5 to <16 x i8>
-!CHECK:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> %6, <16 x i8> %7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %8, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %3 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %4 = load <4 x float>, ptr %2, align 16
+!LLVMIR:  %5 = load <4 x float>, ptr %3, align 16
+!LLVMIR:  %6 = bitcast <4 x float> %4 to <16 x i8>
+!LLVMIR:  %7 = bitcast <4 x float> %5 to <16 x i8>
+!LLVMIR:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> %6, <16 x i8> %7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %8, ptr %1, align 64
 
       subroutine test_pmxvf32ger_r4_non_def()
       use, intrinsic :: mma
@@ -421,15 +421,15 @@ subroutine test_pmxvf32ger_r4_non_def()
       end subroutine test_pmxvf32ger_r4_non_def
 
 !CHECK-LABEL: @test_pmxvf32ger_r4_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %3 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %4 = load <4 x float>, ptr %2, align 16
-!CHECK:  %5 = load <4 x float>, ptr %3, align 16
-!CHECK:  %6 = bitcast <4 x float> %4 to <16 x i8>
-!CHECK:  %7 = bitcast <4 x float> %5 to <16 x i8>
-!CHECK:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> %6, <16 x i8> %7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %8, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %3 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %4 = load <4 x float>, ptr %2, align 16
+!LLVMIR:  %5 = load <4 x float>, ptr %3, align 16
+!LLVMIR:  %6 = bitcast <4 x float> %4 to <16 x i8>
+!LLVMIR:  %7 = bitcast <4 x float> %5 to <16 x i8>
+!LLVMIR:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> %6, <16 x i8> %7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %8, ptr %1, align 64
 
       subroutine test_pmxvf32gernn_u1_def()
       use, intrinsic :: mma
@@ -440,14 +440,14 @@ subroutine test_pmxvf32gernn_u1_def()
       end subroutine test_pmxvf32gernn_u1_def
 
 !CHECK-LABEL: @test_pmxvf32gernn_u1_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvf32gernn_u1_non_def()
       use, intrinsic :: mma
@@ -458,14 +458,14 @@ subroutine test_pmxvf32gernn_u1_non_def()
       end subroutine test_pmxvf32gernn_u1_non_def
 
 !CHECK-LABEL: @test_pmxvf32gernn_u1_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvf32gernn_r4_def()
       use, intrinsic :: mma
@@ -476,16 +476,16 @@ subroutine test_pmxvf32gernn_r4_def()
       end subroutine test_pmxvf32gernn_r4_def
 
 !CHECK-LABEL: @test_pmxvf32gernn_r4_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %3 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %4 = load <4 x float>, ptr %2, align 16
-!CHECK:  %5 = load <4 x float>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = bitcast <4 x float> %4 to <16 x i8>
-!CHECK:  %8 = bitcast <4 x float> %5 to <16 x i8>
-!CHECK:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
-!CHECK:  store <512 x i1> %9, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %3 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %4 = load <4 x float>, ptr %2, align 16
+!LLVMIR:  %5 = load <4 x float>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = bitcast <4 x float> %4 to <16 x i8>
+!LLVMIR:  %8 = bitcast <4 x float> %5 to <16 x i8>
+!LLVMIR:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %9, ptr %1, align 64
 
       subroutine test_pmxvf32gernn_r4_non_def()
       use, intrinsic :: mma
@@ -496,16 +496,16 @@ subroutine test_pmxvf32gernn_r4_non_def()
       end subroutine test_pmxvf32gernn_r4_non_def
 
 !CHECK-LABEL: @test_pmxvf32gernn_r4_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %3 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %4 = load <4 x float>, ptr %2, align 16
-!CHECK:  %5 = load <4 x float>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = bitcast <4 x float> %4 to <16 x i8>
-!CHECK:  %8 = bitcast <4 x float> %5 to <16 x i8>
-!CHECK:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
-!CHECK:  store <512 x i1> %9, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %3 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %4 = load <4 x float>, ptr %2, align 16
+!LLVMIR:  %5 = load <4 x float>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = bitcast <4 x float> %4 to <16 x i8>
+!LLVMIR:  %8 = bitcast <4 x float> %5 to <16 x i8>
+!LLVMIR:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %9, ptr %1, align 64
 
       subroutine test_pmxvf32gernp_u1_def()
       use, intrinsic :: mma
@@ -516,14 +516,14 @@ subroutine test_pmxvf32gernp_u1_def()
       end subroutine test_pmxvf32gernp_u1_def
 
 !CHECK-LABEL: @test_pmxvf32gernp_u1_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvf32gernp_u1_non_def()
       use, intrinsic :: mma
@@ -534,14 +534,14 @@ subroutine test_pmxvf32gernp_u1_non_def()
       end subroutine test_pmxvf32gernp_u1_non_def
 
 !CHECK-LABEL: @test_pmxvf32gernp_u1_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvf32gernp_r4_def()
       use, intrinsic :: mma
@@ -552,16 +552,16 @@ subroutine test_pmxvf32gernp_r4_def()
       end subroutine test_pmxvf32gernp_r4_def
 
 !CHECK-LABEL: @test_pmxvf32gernp_r4_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %3 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %4 = load <4 x float>, ptr %2, align 16
-!CHECK:  %5 = load <4 x float>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = bitcast <4 x float> %4 to <16 x i8>
-!CHECK:  %8 = bitcast <4 x float> %5 to <16 x i8>
-!CHECK:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
-!CHECK:  store <512 x i1> %9, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %3 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %4 = load <4 x float>, ptr %2, align 16
+!LLVMIR:  %5 = load <4 x float>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = bitcast <4 x float> %4 to <16 x i8>
+!LLVMIR:  %8 = bitcast <4 x float> %5 to <16 x i8>
+!LLVMIR:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %9, ptr %1, align 64
 
       subroutine test_pmxvf32gernp_r4_non_def()
       use, intrinsic :: mma
@@ -572,16 +572,16 @@ subroutine test_pmxvf32gernp_r4_non_def()
       end subroutine test_pmxvf32gernp_r4_non_def
 
 !CHECK-LABEL: @test_pmxvf32gernp_r4_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %3 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %4 = load <4 x float>, ptr %2, align 16
-!CHECK:  %5 = load <4 x float>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = bitcast <4 x float> %4 to <16 x i8>
-!CHECK:  %8 = bitcast <4 x float> %5 to <16 x i8>
-!CHECK:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
-!CHECK:  store <512 x i1> %9, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %3 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %4 = load <4 x float>, ptr %2, align 16
+!LLVMIR:  %5 = load <4 x float>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = bitcast <4 x float> %4 to <16 x i8>
+!LLVMIR:  %8 = bitcast <4 x float> %5 to <16 x i8>
+!LLVMIR:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %9, ptr %1, align 64
 
       subroutine test_pmxvf32gerpn_u1_def()
       use, intrinsic :: mma
@@ -592,14 +592,14 @@ subroutine test_pmxvf32gerpn_u1_def()
       end subroutine test_pmxvf32gerpn_u1_def
 
 !CHECK-LABEL: @test_pmxvf32gerpn_u1_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvf32gerpn_u1_non_def()
       use, intrinsic :: mma
@@ -610,14 +610,14 @@ subroutine test_pmxvf32gerpn_u1_non_def()
       end subroutine test_pmxvf32gerpn_u1_non_def
 
 !CHECK-LABEL: @test_pmxvf32gerpn_u1_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvf32gerpn_r4_def()
       use, intrinsic :: mma
@@ -628,16 +628,16 @@ subroutine test_pmxvf32gerpn_r4_def()
       end subroutine test_pmxvf32gerpn_r4_def
 
 !CHECK-LABEL: @test_pmxvf32gerpn_r4_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %3 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %4 = load <4 x float>, ptr %2, align 16
-!CHECK:  %5 = load <4 x float>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = bitcast <4 x float> %4 to <16 x i8>
-!CHECK:  %8 = bitcast <4 x float> %5 to <16 x i8>
-!CHECK:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
-!CHECK:  store <512 x i1> %9, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %3 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %4 = load <4 x float>, ptr %2, align 16
+!LLVMIR:  %5 = load <4 x float>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = bitcast <4 x float> %4 to <16 x i8>
+!LLVMIR:  %8 = bitcast <4 x float> %5 to <16 x i8>
+!LLVMIR:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %9, ptr %1, align 64
 
       subroutine test_pmxvf32gerpn_r4_non_def()
       use, intrinsic :: mma
@@ -648,16 +648,16 @@ subroutine test_pmxvf32gerpn_r4_non_def()
       end subroutine test_pmxvf32gerpn_r4_non_def
 
 !CHECK-LABEL: @test_pmxvf32gerpn_r4_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %3 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %4 = load <4 x float>, ptr %2, align 16
-!CHECK:  %5 = load <4 x float>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = bitcast <4 x float> %4 to <16 x i8>
-!CHECK:  %8 = bitcast <4 x float> %5 to <16 x i8>
-!CHECK:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
-!CHECK:  store <512 x i1> %9, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %3 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %4 = load <4 x float>, ptr %2, align 16
+!LLVMIR:  %5 = load <4 x float>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = bitcast <4 x float> %4 to <16 x i8>
+!LLVMIR:  %8 = bitcast <4 x float> %5 to <16 x i8>
+!LLVMIR:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %9, ptr %1, align 64
 
       subroutine test_pmxvf32gerpp_u1_def()
       use, intrinsic :: mma
@@ -668,14 +668,14 @@ subroutine test_pmxvf32gerpp_u1_def()
       end subroutine test_pmxvf32gerpp_u1_def
 
 !CHECK-LABEL: @test_pmxvf32gerpp_u1_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvf32gerpp_u1_non_def()
       use, intrinsic :: mma
@@ -686,14 +686,14 @@ subroutine test_pmxvf32gerpp_u1_non_def()
       end subroutine test_pmxvf32gerpp_u1_non_def
 
 !CHECK-LABEL: @test_pmxvf32gerpp_u1_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvf32gerpp_r4_def()
       use, intrinsic :: mma
@@ -704,16 +704,16 @@ subroutine test_pmxvf32gerpp_r4_def()
       end subroutine test_pmxvf32gerpp_r4_def
 
 !CHECK-LABEL: @test_pmxvf32gerpp_r4_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %3 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %4 = load <4 x float>, ptr %2, align 16
-!CHECK:  %5 = load <4 x float>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = bitcast <4 x float> %4 to <16 x i8>
-!CHECK:  %8 = bitcast <4 x float> %5 to <16 x i8>
-!CHECK:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
-!CHECK:  store <512 x i1> %9, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %3 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %4 = load <4 x float>, ptr %2, align 16
+!LLVMIR:  %5 = load <4 x float>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = bitcast <4 x float> %4 to <16 x i8>
+!LLVMIR:  %8 = bitcast <4 x float> %5 to <16 x i8>
+!LLVMIR:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %9, ptr %1, align 64
 
       subroutine test_pmxvf32gerpp_r4_non_def()
       use, intrinsic :: mma
@@ -724,16 +724,16 @@ subroutine test_pmxvf32gerpp_r4_non_def()
       end subroutine test_pmxvf32gerpp_r4_non_def
 
 !CHECK-LABEL: @test_pmxvf32gerpp_r4_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %3 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %4 = load <4 x float>, ptr %2, align 16
-!CHECK:  %5 = load <4 x float>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = bitcast <4 x float> %4 to <16 x i8>
-!CHECK:  %8 = bitcast <4 x float> %5 to <16 x i8>
-!CHECK:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
-!CHECK:  store <512 x i1> %9, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %3 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %4 = load <4 x float>, ptr %2, align 16
+!LLVMIR:  %5 = load <4 x float>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = bitcast <4 x float> %4 to <16 x i8>
+!LLVMIR:  %8 = bitcast <4 x float> %5 to <16 x i8>
+!LLVMIR:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %9, ptr %1, align 64
 
       subroutine test_pmxvf64ger_u1_def()
       use, intrinsic :: mma
@@ -745,13 +745,13 @@ subroutine test_pmxvf64ger_u1_def()
       end subroutine test_pmxvf64ger_u1_def
 
 !CHECK-LABEL: @test_pmxvf64ger_u1_def_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
-!CHECK:  store <512 x i1> %6, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %6, ptr %2, align 64
 
       subroutine test_pmxvf64ger_u1_non_def()
       use, intrinsic :: mma
@@ -763,13 +763,13 @@ subroutine test_pmxvf64ger_u1_non_def()
       end subroutine test_pmxvf64ger_u1_non_def
 
 !CHECK-LABEL: @test_pmxvf64ger_u1_non_def_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
-!CHECK:  store <512 x i1> %6, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %6, ptr %2, align 64
 
       subroutine test_pmxvf64ger_r8_def()
       use, intrinsic :: mma
@@ -781,14 +781,14 @@ subroutine test_pmxvf64ger_r8_def()
       end subroutine test_pmxvf64ger_r8_def
 
 !CHECK-LABEL: @test_pmxvf64ger_r8_def_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <2 x double>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <2 x double>, ptr %3, align 16
-!CHECK:  %6 = bitcast <2 x double> %5 to <16 x i8>
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> %4, <16 x i8> %6, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <2 x double>, ptr %3, align 16
+!LLVMIR:  %6 = bitcast <2 x double> %5 to <16 x i8>
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> %4, <16 x i8> %6, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %2, align 64
 
       subroutine test_pmxvf64ger_r8_non_def()
       use, intrinsic :: mma
@@ -800,14 +800,14 @@ subroutine test_pmxvf64ger_r8_non_def()
       end subroutine test_pmxvf64ger_r8_non_def
 
 !CHECK-LABEL: @test_pmxvf64ger_r8_non_def_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <2 x double>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <2 x double>, ptr %3, align 16
-!CHECK:  %6 = bitcast <2 x double> %5 to <16 x i8>
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> %4, <16 x i8> %6, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <2 x double>, ptr %3, align 16
+!LLVMIR:  %6 = bitcast <2 x double> %5 to <16 x i8>
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> %4, <16 x i8> %6, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %2, align 64
 
       subroutine test_pmxvf64gernn_u1_def()
       use, intrinsic :: mma
@@ -819,14 +819,14 @@ subroutine test_pmxvf64gernn_u1_def()
       end subroutine test_pmxvf64gernn_u1_def
 
 !CHECK-LABEL: @test_pmxvf64gernn_u1_def_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %2, align 64
 
       subroutine test_pmxvf64gernn_u1_non_def()
       use, intrinsic :: mma
@@ -838,14 +838,14 @@ subroutine test_pmxvf64gernn_u1_non_def()
       end subroutine test_pmxvf64gernn_u1_non_def
 
 !CHECK-LABEL: @test_pmxvf64gernn_u1_non_def_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %2, align 64
 
       subroutine test_pmxvf64gernn_r8_def()
       use, intrinsic :: mma
@@ -857,15 +857,15 @@ subroutine test_pmxvf64gernn_r8_def()
       end subroutine test_pmxvf64gernn_r8_def
 
 !CHECK-LABEL: @test_pmxvf64gernn_r8_def_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <2 x double>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <2 x double>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = bitcast <2 x double> %5 to <16 x i8>
-!CHECK:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %8, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <2 x double>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = bitcast <2 x double> %5 to <16 x i8>
+!LLVMIR:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %8, ptr %2, align 64
 
       subroutine test_pmxvf64gernn_r8_non_def()
       use, intrinsic :: mma
@@ -877,15 +877,15 @@ subroutine test_pmxvf64gernn_r8_non_def()
       end subroutine test_pmxvf64gernn_r8_non_def
 
 !CHECK-LABEL: @test_pmxvf64gernn_r8_non_def_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <2 x double>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <2 x double>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = bitcast <2 x double> %5 to <16 x i8>
-!CHECK:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %8, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <2 x double>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = bitcast <2 x double> %5 to <16 x i8>
+!LLVMIR:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %8, ptr %2, align 64
 
       subroutine test_pmxvf64gernp_u1_def()
       use, intrinsic :: mma
@@ -897,14 +897,14 @@ subroutine test_pmxvf64gernp_u1_def()
       end subroutine test_pmxvf64gernp_u1_def
 
 !CHECK-LABEL: @test_pmxvf64gernp_u1_def_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %2, align 64
 
       subroutine test_pmxvf64gernp_u1_non_def()
       use, intrinsic :: mma
@@ -916,14 +916,14 @@ subroutine test_pmxvf64gernp_u1_non_def()
       end subroutine test_pmxvf64gernp_u1_non_def
 
 !CHECK-LABEL: @test_pmxvf64gernp_u1_non_def_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %2, align 64
 
       subroutine test_pmxvf64gernp_r8_def()
       use, intrinsic :: mma
@@ -935,15 +935,15 @@ subroutine test_pmxvf64gernp_r8_def()
       end subroutine test_pmxvf64gernp_r8_def
 
 !CHECK-LABEL: @test_pmxvf64gernp_r8_def_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <2 x double>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <2 x double>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = bitcast <2 x double> %5 to <16 x i8>
-!CHECK:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %8, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <2 x double>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = bitcast <2 x double> %5 to <16 x i8>
+!LLVMIR:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %8, ptr %2, align 64
 
       subroutine test_pmxvf64gernp_r8_non_def()
       use, intrinsic :: mma
@@ -955,15 +955,15 @@ subroutine test_pmxvf64gernp_r8_non_def()
       end subroutine test_pmxvf64gernp_r8_non_def
 
 !CHECK-LABEL: @test_pmxvf64gernp_r8_non_def_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <2 x double>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <2 x double>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = bitcast <2 x double> %5 to <16 x i8>
-!CHECK:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %8, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <2 x double>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = bitcast <2 x double> %5 to <16 x i8>
+!LLVMIR:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %8, ptr %2, align 64
 
       subroutine test_pmxvf64gerpn_u1_def()
       use, intrinsic :: mma
@@ -975,14 +975,14 @@ subroutine test_pmxvf64gerpn_u1_def()
       end subroutine test_pmxvf64gerpn_u1_def
 
 !CHECK-LABEL: @test_pmxvf64gerpn_u1_def_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %2, align 64
 
       subroutine test_pmxvf64gerpn_u1_non_def()
       use, intrinsic :: mma
@@ -994,14 +994,14 @@ subroutine test_pmxvf64gerpn_u1_non_def()
       end subroutine test_pmxvf64gerpn_u1_non_def
 
 !CHECK-LABEL: @test_pmxvf64gerpn_u1_non_def_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %2, align 64
 
       subroutine test_pmxvf64gerpn_r8_def()
       use, intrinsic :: mma
@@ -1013,15 +1013,15 @@ subroutine test_pmxvf64gerpn_r8_def()
       end subroutine test_pmxvf64gerpn_r8_def
 
 !CHECK-LABEL: @test_pmxvf64gerpn_r8_def_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <2 x double>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <2 x double>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = bitcast <2 x double> %5 to <16 x i8>
-!CHECK:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %8, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <2 x double>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = bitcast <2 x double> %5 to <16 x i8>
+!LLVMIR:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %8, ptr %2, align 64
 
       subroutine test_pmxvf64gerpn_r8_non_def()
       use, intrinsic :: mma
@@ -1033,15 +1033,15 @@ subroutine test_pmxvf64gerpn_r8_non_def()
       end subroutine test_pmxvf64gerpn_r8_non_def
 
 !CHECK-LABEL: @test_pmxvf64gerpn_r8_non_def_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <2 x double>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <2 x double>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = bitcast <2 x double> %5 to <16 x i8>
-!CHECK:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %8, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <2 x double>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = bitcast <2 x double> %5 to <16 x i8>
+!LLVMIR:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %8, ptr %2, align 64
 
       subroutine test_pmxvf64gerpp_u1_def()
       use, intrinsic :: mma
@@ -1053,14 +1053,14 @@ subroutine test_pmxvf64gerpp_u1_def()
       end subroutine test_pmxvf64gerpp_u1_def
 
 !CHECK-LABEL: @test_pmxvf64gerpp_u1_def_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %2, align 64
 
       subroutine test_pmxvf64gerpp_u1_non_def()
       use, intrinsic :: mma
@@ -1072,14 +1072,14 @@ subroutine test_pmxvf64gerpp_u1_non_def()
       end subroutine test_pmxvf64gerpp_u1_non_def
 
 !CHECK-LABEL: @test_pmxvf64gerpp_u1_non_def_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %2, align 64
 
       subroutine test_pmxvf64gerpp_r8_def()
       use, intrinsic :: mma
@@ -1091,15 +1091,15 @@ subroutine test_pmxvf64gerpp_r8_def()
       end subroutine test_pmxvf64gerpp_r8_def
 
 !CHECK-LABEL: @test_pmxvf64gerpp_r8_def_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <2 x double>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <2 x double>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = bitcast <2 x double> %5 to <16 x i8>
-!CHECK:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %8, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <2 x double>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = bitcast <2 x double> %5 to <16 x i8>
+!LLVMIR:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %8, ptr %2, align 64
 
       subroutine test_pmxvf64gerpp_r8_non_def()
       use, intrinsic :: mma
@@ -1111,15 +1111,15 @@ subroutine test_pmxvf64gerpp_r8_non_def()
       end subroutine test_pmxvf64gerpp_r8_non_def
 
 !CHECK-LABEL: @test_pmxvf64gerpp_r8_non_def_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <2 x double>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <2 x double>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = bitcast <2 x double> %5 to <16 x i8>
-!CHECK:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %8, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <2 x double>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = bitcast <2 x double> %5 to <16 x i8>
+!LLVMIR:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %8, ptr %2, align 64
 
       subroutine test_pmxvi16ger2_u1_def()
       use, intrinsic :: mma
@@ -1130,13 +1130,13 @@ subroutine test_pmxvi16ger2_u1_def()
       end subroutine test_pmxvi16ger2_u1_def
 
 !CHECK-LABEL: @test_pmxvi16ger2_u1_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %6, ptr %1, align 64
 
       subroutine test_pmxvi16ger2_u1_non_def()
       use, intrinsic :: mma
@@ -1147,13 +1147,13 @@ subroutine test_pmxvi16ger2_u1_non_def()
       end subroutine test_pmxvi16ger2_u1_non_def
 
 !CHECK-LABEL: @test_pmxvi16ger2_u1_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %6, ptr %1, align 64
 
       subroutine test_pmxvi16ger2_i2_def()
       use, intrinsic :: mma
@@ -1164,15 +1164,15 @@ subroutine test_pmxvi16ger2_i2_def()
       end subroutine test_pmxvi16ger2_i2_def
 
 !CHECK-LABEL: @test_pmxvi16ger2_i2_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %3 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %4 = load <8 x i16>, ptr %2, align 16
-!CHECK:  %5 = load <8 x i16>, ptr %3, align 16
-!CHECK:  %6 = bitcast <8 x i16> %4 to <16 x i8>
-!CHECK:  %7 = bitcast <8 x i16> %5 to <16 x i8>
-!CHECK:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> %6, <16 x i8> %7, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %8, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %3 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %4 = load <8 x i16>, ptr %2, align 16
+!LLVMIR:  %5 = load <8 x i16>, ptr %3, align 16
+!LLVMIR:  %6 = bitcast <8 x i16> %4 to <16 x i8>
+!LLVMIR:  %7 = bitcast <8 x i16> %5 to <16 x i8>
+!LLVMIR:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> %6, <16 x i8> %7, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %8, ptr %1, align 64
 
       subroutine test_pmxvi16ger2_i2_non_def()
       use, intrinsic :: mma
@@ -1183,15 +1183,15 @@ subroutine test_pmxvi16ger2_i2_non_def()
       end subroutine test_pmxvi16ger2_i2_non_def
 
 !CHECK-LABEL: @test_pmxvi16ger2_i2_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %3 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %4 = load <8 x i16>, ptr %2, align 16
-!CHECK:  %5 = load <8 x i16>, ptr %3, align 16
-!CHECK:  %6 = bitcast <8 x i16> %4 to <16 x i8>
-!CHECK:  %7 = bitcast <8 x i16> %5 to <16 x i8>
-!CHECK:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> %6, <16 x i8> %7, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %8, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %3 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %4 = load <8 x i16>, ptr %2, align 16
+!LLVMIR:  %5 = load <8 x i16>, ptr %3, align 16
+!LLVMIR:  %6 = bitcast <8 x i16> %4 to <16 x i8>
+!LLVMIR:  %7 = bitcast <8 x i16> %5 to <16 x i8>
+!LLVMIR:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> %6, <16 x i8> %7, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %8, ptr %1, align 64
 
       subroutine test_pmxvi16ger2pp_u1_def()
       use, intrinsic :: mma
@@ -1202,14 +1202,14 @@ subroutine test_pmxvi16ger2pp_u1_def()
       end subroutine test_pmxvi16ger2pp_u1_def
 
 !CHECK-LABEL: @test_pmxvi16ger2pp_u1_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvi16ger2pp_u1_non_def()
       use, intrinsic :: mma
@@ -1220,14 +1220,14 @@ subroutine test_pmxvi16ger2pp_u1_non_def()
       end subroutine test_pmxvi16ger2pp_u1_non_def
 
 !CHECK-LABEL: @test_pmxvi16ger2pp_u1_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvi16ger2pp_i2_def()
       use, intrinsic :: mma
@@ -1238,16 +1238,16 @@ subroutine test_pmxvi16ger2pp_i2_def()
       end subroutine test_pmxvi16ger2pp_i2_def
 
 !CHECK-LABEL: @test_pmxvi16ger2pp_i2_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %3 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %4 = load <8 x i16>, ptr %2, align 16
-!CHECK:  %5 = load <8 x i16>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = bitcast <8 x i16> %4 to <16 x i8>
-!CHECK:  %8 = bitcast <8 x i16> %5 to <16 x i8>
-!CHECK:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %9, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %3 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %4 = load <8 x i16>, ptr %2, align 16
+!LLVMIR:  %5 = load <8 x i16>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = bitcast <8 x i16> %4 to <16 x i8>
+!LLVMIR:  %8 = bitcast <8 x i16> %5 to <16 x i8>
+!LLVMIR:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %9, ptr %1, align 64
 
       subroutine test_pmxvi16ger2pp_i2_non_def()
       use, intrinsic :: mma
@@ -1258,16 +1258,16 @@ subroutine test_pmxvi16ger2pp_i2_non_def()
       end subroutine test_pmxvi16ger2pp_i2_non_def
 
 !CHECK-LABEL: @test_pmxvi16ger2pp_i2_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %3 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %4 = load <8 x i16>, ptr %2, align 16
-!CHECK:  %5 = load <8 x i16>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = bitcast <8 x i16> %4 to <16 x i8>
-!CHECK:  %8 = bitcast <8 x i16> %5 to <16 x i8>
-!CHECK:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %9, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %3 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %4 = load <8 x i16>, ptr %2, align 16
+!LLVMIR:  %5 = load <8 x i16>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = bitcast <8 x i16> %4 to <16 x i8>
+!LLVMIR:  %8 = bitcast <8 x i16> %5 to <16 x i8>
+!LLVMIR:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %9, ptr %1, align 64
 
       subroutine test_pmxvi16ger2s_u1_def()
       use, intrinsic :: mma
@@ -1278,13 +1278,13 @@ subroutine test_pmxvi16ger2s_u1_def()
       end subroutine test_pmxvi16ger2s_u1_def
 
 !CHECK-LABEL: @test_pmxvi16ger2s_u1_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %6, ptr %1, align 64
 
       subroutine test_pmxvi16ger2s_u1_non_def()
       use, intrinsic :: mma
@@ -1295,13 +1295,13 @@ subroutine test_pmxvi16ger2s_u1_non_def()
       end subroutine test_pmxvi16ger2s_u1_non_def
 
 !CHECK-LABEL: @test_pmxvi16ger2s_u1_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %6, ptr %1, align 64
 
       subroutine test_pmxvi16ger2s_i2_def()
       use, intrinsic :: mma
@@ -1312,15 +1312,15 @@ subroutine test_pmxvi16ger2s_i2_def()
       end subroutine test_pmxvi16ger2s_i2_def
 
 !CHECK-LABEL: @test_pmxvi16ger2s_i2_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %3 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %4 = load <8 x i16>, ptr %2, align 16
-!CHECK:  %5 = load <8 x i16>, ptr %3, align 16
-!CHECK:  %6 = bitcast <8 x i16> %4 to <16 x i8>
-!CHECK:  %7 = bitcast <8 x i16> %5 to <16 x i8>
-!CHECK:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> %6, <16 x i8> %7, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %8, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %3 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %4 = load <8 x i16>, ptr %2, align 16
+!LLVMIR:  %5 = load <8 x i16>, ptr %3, align 16
+!LLVMIR:  %6 = bitcast <8 x i16> %4 to <16 x i8>
+!LLVMIR:  %7 = bitcast <8 x i16> %5 to <16 x i8>
+!LLVMIR:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> %6, <16 x i8> %7, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %8, ptr %1, align 64
 
       subroutine test_pmxvi16ger2s_i2_non_def()
       use, intrinsic :: mma
@@ -1331,15 +1331,15 @@ subroutine test_pmxvi16ger2s_i2_non_def()
       end subroutine test_pmxvi16ger2s_i2_non_def
 
 !CHECK-LABEL: @test_pmxvi16ger2s_i2_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %3 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %4 = load <8 x i16>, ptr %2, align 16
-!CHECK:  %5 = load <8 x i16>, ptr %3, align 16
-!CHECK:  %6 = bitcast <8 x i16> %4 to <16 x i8>
-!CHECK:  %7 = bitcast <8 x i16> %5 to <16 x i8>
-!CHECK:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> %6, <16 x i8> %7, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %8, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %3 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %4 = load <8 x i16>, ptr %2, align 16
+!LLVMIR:  %5 = load <8 x i16>, ptr %3, align 16
+!LLVMIR:  %6 = bitcast <8 x i16> %4 to <16 x i8>
+!LLVMIR:  %7 = bitcast <8 x i16> %5 to <16 x i8>
+!LLVMIR:  %8 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> %6, <16 x i8> %7, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %8, ptr %1, align 64
 
       subroutine test_pmxvi16ger2spp_u1_def()
       use, intrinsic :: mma
@@ -1350,14 +1350,14 @@ subroutine test_pmxvi16ger2spp_u1_def()
       end subroutine test_pmxvi16ger2spp_u1_def
 
 !CHECK-LABEL: @test_pmxvi16ger2spp_u1_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvi16ger2spp_u1_non_def()
       use, intrinsic :: mma
@@ -1368,14 +1368,14 @@ subroutine test_pmxvi16ger2spp_u1_non_def()
       end subroutine test_pmxvi16ger2spp_u1_non_def
 
 !CHECK-LABEL: @test_pmxvi16ger2spp_u1_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvi16ger2spp_i2_def()
       use, intrinsic :: mma
@@ -1386,16 +1386,16 @@ subroutine test_pmxvi16ger2spp_i2_def()
       end subroutine test_pmxvi16ger2spp_i2_def
 
 !CHECK-LABEL: @test_pmxvi16ger2spp_i2_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %3 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %4 = load <8 x i16>, ptr %2, align 16
-!CHECK:  %5 = load <8 x i16>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = bitcast <8 x i16> %4 to <16 x i8>
-!CHECK:  %8 = bitcast <8 x i16> %5 to <16 x i8>
-!CHECK:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %9, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %3 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %4 = load <8 x i16>, ptr %2, align 16
+!LLVMIR:  %5 = load <8 x i16>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = bitcast <8 x i16> %4 to <16 x i8>
+!LLVMIR:  %8 = bitcast <8 x i16> %5 to <16 x i8>
+!LLVMIR:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %9, ptr %1, align 64
 
       subroutine test_pmxvi16ger2spp_i2_non_def()
       use, intrinsic :: mma
@@ -1406,16 +1406,16 @@ subroutine test_pmxvi16ger2spp_i2_non_def()
       end subroutine test_pmxvi16ger2spp_i2_non_def
 
 !CHECK-LABEL: @test_pmxvi16ger2spp_i2_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %3 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %4 = load <8 x i16>, ptr %2, align 16
-!CHECK:  %5 = load <8 x i16>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = bitcast <8 x i16> %4 to <16 x i8>
-!CHECK:  %8 = bitcast <8 x i16> %5 to <16 x i8>
-!CHECK:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %9, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %3 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %4 = load <8 x i16>, ptr %2, align 16
+!LLVMIR:  %5 = load <8 x i16>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = bitcast <8 x i16> %4 to <16 x i8>
+!LLVMIR:  %8 = bitcast <8 x i16> %5 to <16 x i8>
+!LLVMIR:  %9 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %9, ptr %1, align 64
 
 
       subroutine test_pmxvi4ger8_def()
@@ -1427,13 +1427,13 @@ subroutine test_pmxvi4ger8_def()
       end subroutine test_pmxvi4ger8_def
 
 !CHECK-LABEL: @test_pmxvi4ger8_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %6, ptr %1, align 64
 
       subroutine test_pmxvi4ger8_non_def()
       use, intrinsic :: mma
@@ -1444,13 +1444,13 @@ subroutine test_pmxvi4ger8_non_def()
       end subroutine test_pmxvi4ger8_non_def
 
 !CHECK-LABEL: @test_pmxvi4ger8_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %6, ptr %1, align 64
 
       subroutine test_pmxvi4ger8pp_def()
       use, intrinsic :: mma
@@ -1461,14 +1461,14 @@ subroutine test_pmxvi4ger8pp_def()
       end subroutine test_pmxvi4ger8pp_def
 
 !CHECK-LABEL: @test_pmxvi4ger8pp_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvi4ger8pp_non_def()
       use, intrinsic :: mma
@@ -1479,14 +1479,14 @@ subroutine test_pmxvi4ger8pp_non_def()
       end subroutine test_pmxvi4ger8pp_non_def
 
 !CHECK-LABEL: @test_pmxvi4ger8pp_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvi8ger4_u1_def()
       use, intrinsic :: mma
@@ -1497,13 +1497,13 @@ subroutine test_pmxvi8ger4_u1_def()
       end subroutine test_pmxvi8ger4_u1_def
 
 !CHECK-LABEL: @test_pmxvi8ger4_u1_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %6, ptr %1, align 64
 
       subroutine test_pmxvi8ger4_u1_non_def()
       use, intrinsic :: mma
@@ -1514,13 +1514,13 @@ subroutine test_pmxvi8ger4_u1_non_def()
       end subroutine test_pmxvi8ger4_u1_non_def
 
 !CHECK-LABEL: @test_pmxvi8ger4_u1_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %6, ptr %1, align 64
 
       subroutine test_pmxvi8ger4_i1_def()
       use, intrinsic :: mma
@@ -1531,13 +1531,13 @@ subroutine test_pmxvi8ger4_i1_def()
       end subroutine test_pmxvi8ger4_i1_def
 
 !CHECK-LABEL: @test_pmxvi8ger4_i1_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %6, ptr %1, align 64
 
       subroutine test_pmxvi8ger4_i1_non_def()
       use, intrinsic :: mma
@@ -1548,13 +1548,13 @@ subroutine test_pmxvi8ger4_i1_non_def()
       end subroutine test_pmxvi8ger4_i1_non_def
 
 !CHECK-LABEL: @test_pmxvi8ger4_i1_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %6, ptr %1, align 64
 
       subroutine test_pmxvi8ger4pp_u1_def()
       use, intrinsic :: mma
@@ -1565,14 +1565,14 @@ subroutine test_pmxvi8ger4pp_u1_def()
       end subroutine test_pmxvi8ger4pp_u1_def
 
 !CHECK-LABEL: @test_pmxvi8ger4pp_u1_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvi8ger4pp_u1_non_def()
       use, intrinsic :: mma
@@ -1583,14 +1583,14 @@ subroutine test_pmxvi8ger4pp_u1_non_def()
       end subroutine test_pmxvi8ger4pp_u1_non_def
 
 !CHECK-LABEL: @test_pmxvi8ger4pp_u1_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvi8ger4pp_i1_def()
       use, intrinsic :: mma
@@ -1601,14 +1601,14 @@ subroutine test_pmxvi8ger4pp_i1_def()
       end subroutine test_pmxvi8ger4pp_i1_def
 
 !CHECK-LABEL: @test_pmxvi8ger4pp_i1_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvi8ger4pp_i1_non_def()
       use, intrinsic :: mma
@@ -1619,14 +1619,14 @@ subroutine test_pmxvi8ger4pp_i1_non_def()
       end subroutine test_pmxvi8ger4pp_i1_non_def
 
 !CHECK-LABEL: @test_pmxvi8ger4pp_i1_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvi8ger4spp_u1_def()
       use, intrinsic :: mma
@@ -1637,14 +1637,14 @@ subroutine test_pmxvi8ger4spp_u1_def()
       end subroutine test_pmxvi8ger4spp_u1_def
 
 !CHECK-LABEL: @test_pmxvi8ger4spp_u1_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvi8ger4spp_u1_non_def()
       use, intrinsic :: mma
@@ -1655,14 +1655,14 @@ subroutine test_pmxvi8ger4spp_u1_non_def()
       end subroutine test_pmxvi8ger4spp_u1_non_def
 
 !CHECK-LABEL: @test_pmxvi8ger4spp_u1_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvi8ger4spp_i1_def()
       use, intrinsic :: mma
@@ -1673,14 +1673,14 @@ subroutine test_pmxvi8ger4spp_i1_def()
       end subroutine test_pmxvi8ger4spp_i1_def
 
 !CHECK-LABEL: @test_pmxvi8ger4spp_i1_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_pmxvi8ger4spp_i1_non_def()
       use, intrinsic :: mma
@@ -1691,11 +1691,11 @@ subroutine test_pmxvi8ger4spp_i1_non_def()
       end subroutine test_pmxvi8ger4spp_i1_non_def
 
 !CHECK-LABEL: @test_pmxvi8ger4spp_i1_non_def_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64

diff  --git a/flang/test/Lower/PowerPC/ppc-mma-outer-product-2.f90 b/flang/test/Lower/PowerPC/ppc-mma-outer-product-2.f90
index 778d58a745be9d9..3ef17b2f963fc37 100644
--- a/flang/test/Lower/PowerPC/ppc-mma-outer-product-2.f90
+++ b/flang/test/Lower/PowerPC/ppc-mma-outer-product-2.f90
@@ -1,4 +1,4 @@
-! RUN: %flang_fc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -triple powerpc64le-unknown-unknown -target-cpu pwr10 -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
       subroutine test_xvbf16ger2()
@@ -10,13 +10,13 @@ subroutine test_xvbf16ger2()
       end subroutine test_xvbf16ger2
 
 !CHECK-LABEL: @test_xvbf16ger2_
-!CHECK:   %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:   %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:   %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:   %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:   %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:   %6 = call <512 x i1> @llvm.ppc.mma.xvbf16ger2(<16 x i8> %4, <16 x i8> %5)
-!CHECK:   store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:   %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:   %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:   %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:   %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:   %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:   %6 = call <512 x i1> @llvm.ppc.mma.xvbf16ger2(<16 x i8> %4, <16 x i8> %5)
+!LLVMIR:   store <512 x i1> %6, ptr %1, align 64
 
 
       subroutine test_xvbf16ger2nn()
@@ -28,14 +28,14 @@ subroutine test_xvbf16ger2nn()
       end subroutine test_xvbf16ger2nn
 
 !CHECK-LABEL: @test_xvbf16ger2nn_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_xvbf16ger2np()
       use, intrinsic :: mma
@@ -46,14 +46,14 @@ subroutine test_xvbf16ger2np()
       end subroutine test_xvbf16ger2np
 
 !CHECK-LABEL: @test_xvbf16ger2np_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_xvbf16ger2pn()
       use, intrinsic :: mma
@@ -64,14 +64,14 @@ subroutine test_xvbf16ger2pn()
       end subroutine test_xvbf16ger2pn
 
 !CHECK-LABEL: @test_xvbf16ger2pn_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_xvbf16ger2pp()
       use, intrinsic :: mma
@@ -82,14 +82,14 @@ subroutine test_xvbf16ger2pp()
       end subroutine test_xvbf16ger2pp
 
 !CHECK-LABEL: @test_xvbf16ger2pp_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_xvf16ger2()
       use, intrinsic :: mma
@@ -100,13 +100,13 @@ subroutine test_xvf16ger2()
       end subroutine test_xvf16ger2
 
 !CHECK-LABEL: @test_xvf16ger2_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %6, ptr %1, align 64
 
       subroutine test_xvf16ger2nn()
       use, intrinsic :: mma
@@ -117,14 +117,14 @@ subroutine test_xvf16ger2nn()
       end subroutine test_xvf16ger2nn
 
 !CHECK-LABEL: @test_xvf16ger2nn_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_xvf16ger2np()
       use, intrinsic :: mma
@@ -135,14 +135,14 @@ subroutine test_xvf16ger2np()
       end subroutine test_xvf16ger2np
 
 !CHECK-LABEL: @test_xvf16ger2np_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_xvf16ger2pn()
       use, intrinsic :: mma
@@ -153,14 +153,14 @@ subroutine test_xvf16ger2pn()
       end subroutine test_xvf16ger2pn
 
 !CHECK-LABEL: @test_xvf16ger2pn_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_xvf16ger2pp()
       use, intrinsic :: mma
@@ -171,14 +171,14 @@ subroutine test_xvf16ger2pp()
       end subroutine test_xvf16ger2pp
 
 !CHECK-LABEL: @test_xvf16ger2pp_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_xvf32ger_u1()
       use, intrinsic :: mma
@@ -189,13 +189,13 @@ subroutine test_xvf32ger_u1()
       end subroutine test_xvf32ger_u1
 
 !CHECK-LABEL: @test_xvf32ger_u1_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %6, ptr %1, align 64
 
 
       subroutine test_xvf32ger_r4()
@@ -207,15 +207,15 @@ subroutine test_xvf32ger_r4()
       end subroutine test_xvf32ger_r4
 
 !CHECK-LABEL: @test_xvf32ger_r4_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %3 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %4 = load <4 x float>, ptr %2, align 16
-!CHECK:  %5 = load <4 x float>, ptr %3, align 16
-!CHECK:  %6 = bitcast <4 x float> %4 to <16 x i8>
-!CHECK:  %7 = bitcast <4 x float> %5 to <16 x i8>
-!CHECK:  %8 = call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> %6, <16 x i8> %7)
-!CHECK:  store <512 x i1> %8, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %3 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %4 = load <4 x float>, ptr %2, align 16
+!LLVMIR:  %5 = load <4 x float>, ptr %3, align 16
+!LLVMIR:  %6 = bitcast <4 x float> %4 to <16 x i8>
+!LLVMIR:  %7 = bitcast <4 x float> %5 to <16 x i8>
+!LLVMIR:  %8 = call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> %6, <16 x i8> %7)
+!LLVMIR:  store <512 x i1> %8, ptr %1, align 64
 
       subroutine test_xvf32gernn_u1()
       use, intrinsic :: mma
@@ -226,14 +226,14 @@ subroutine test_xvf32gernn_u1()
       end subroutine test_xvf32gernn_u1
 
 !CHECK-LABEL: @test_xvf32gernn_u1_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_xvf32gernn_r4()
       use, intrinsic :: mma
@@ -244,16 +244,16 @@ subroutine test_xvf32gernn_r4()
       end subroutine test_xvf32gernn_r4
 
 !CHECK-LABEL: @test_xvf32gernn_r4_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %3 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %4 = load <4 x float>, ptr %2, align 16
-!CHECK:  %5 = load <4 x float>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = bitcast <4 x float> %4 to <16 x i8>
-!CHECK:  %8 = bitcast <4 x float> %5 to <16 x i8>
-!CHECK:  %9 = call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8)
-!CHECK:  store <512 x i1> %9, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %3 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %4 = load <4 x float>, ptr %2, align 16
+!LLVMIR:  %5 = load <4 x float>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = bitcast <4 x float> %4 to <16 x i8>
+!LLVMIR:  %8 = bitcast <4 x float> %5 to <16 x i8>
+!LLVMIR:  %9 = call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8)
+!LLVMIR:  store <512 x i1> %9, ptr %1, align 64
 
       subroutine test_xvf32gernp_u1()
       use, intrinsic :: mma
@@ -264,14 +264,14 @@ subroutine test_xvf32gernp_u1()
       end subroutine test_xvf32gernp_u1
 
 !CHECK-LABEL: @test_xvf32gernp_u1_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_xvf32gernp_r4()
       use, intrinsic :: mma
@@ -282,16 +282,16 @@ subroutine test_xvf32gernp_r4()
       end subroutine test_xvf32gernp_r4
 
 !CHECK-LABEL: @test_xvf32gernp_r4_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %3 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %4 = load <4 x float>, ptr %2, align 16
-!CHECK:  %5 = load <4 x float>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = bitcast <4 x float> %4 to <16 x i8>
-!CHECK:  %8 = bitcast <4 x float> %5 to <16 x i8>
-!CHECK:  %9 = call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8)
-!CHECK:  store <512 x i1> %9, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %3 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %4 = load <4 x float>, ptr %2, align 16
+!LLVMIR:  %5 = load <4 x float>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = bitcast <4 x float> %4 to <16 x i8>
+!LLVMIR:  %8 = bitcast <4 x float> %5 to <16 x i8>
+!LLVMIR:  %9 = call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8)
+!LLVMIR:  store <512 x i1> %9, ptr %1, align 64
 
       subroutine test_xvf32gerpn_u1()
       use, intrinsic :: mma
@@ -302,14 +302,14 @@ subroutine test_xvf32gerpn_u1()
       end subroutine test_xvf32gerpn_u1
 
 !CHECK-LABEL: @test_xvf32gerpn_u1_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_xvf32gerpn_r4()
       use, intrinsic :: mma
@@ -320,16 +320,16 @@ subroutine test_xvf32gerpn_r4()
       end subroutine test_xvf32gerpn_r4
 
 !CHECK-LABEL: @test_xvf32gerpn_r4_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %3 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %4 = load <4 x float>, ptr %2, align 16
-!CHECK:  %5 = load <4 x float>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = bitcast <4 x float> %4 to <16 x i8>
-!CHECK:  %8 = bitcast <4 x float> %5 to <16 x i8>
-!CHECK:  %9 = call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8)
-!CHECK:  store <512 x i1> %9, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %3 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %4 = load <4 x float>, ptr %2, align 16
+!LLVMIR:  %5 = load <4 x float>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = bitcast <4 x float> %4 to <16 x i8>
+!LLVMIR:  %8 = bitcast <4 x float> %5 to <16 x i8>
+!LLVMIR:  %9 = call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8)
+!LLVMIR:  store <512 x i1> %9, ptr %1, align 64
 
       subroutine test_xvf32gerpp_u1()
       use, intrinsic :: mma
@@ -340,14 +340,14 @@ subroutine test_xvf32gerpp_u1()
       end subroutine test_xvf32gerpp_u1
 
 !CHECK-LABEL: @test_xvf32gerpp_u1_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
 
       subroutine test_xvf32gerpp_r4()
@@ -359,16 +359,16 @@ subroutine test_xvf32gerpp_r4()
       end subroutine test_xvf32gerpp_r4
 
 !CHECK-LABEL: @test_xvf32gerpp_r4_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %3 = alloca <4 x float>, i64 1, align 16
-!CHECK:  %4 = load <4 x float>, ptr %2, align 16
-!CHECK:  %5 = load <4 x float>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = bitcast <4 x float> %4 to <16 x i8>
-!CHECK:  %8 = bitcast <4 x float> %5 to <16 x i8>
-!CHECK:  %9 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8)
-!CHECK:  store <512 x i1> %9, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %3 = alloca <4 x float>, i64 1, align 16
+!LLVMIR:  %4 = load <4 x float>, ptr %2, align 16
+!LLVMIR:  %5 = load <4 x float>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = bitcast <4 x float> %4 to <16 x i8>
+!LLVMIR:  %8 = bitcast <4 x float> %5 to <16 x i8>
+!LLVMIR:  %9 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8)
+!LLVMIR:  store <512 x i1> %9, ptr %1, align 64
 
       subroutine test_xvf64ger_u1()
       use, intrinsic :: mma
@@ -380,13 +380,13 @@ subroutine test_xvf64ger_u1()
       end subroutine test_xvf64ger_u1
 
 !CHECK-LABEL: @test_xvf64ger_u1_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %6, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %6, ptr %2, align 64
 
       subroutine test_xvf64ger_r8()
       use, intrinsic :: mma
@@ -398,14 +398,14 @@ subroutine test_xvf64ger_r8()
       end subroutine test_xvf64ger_r8
 
 !CHECK-LABEL: @test_xvf64ger_r8_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <2 x double>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <2 x double>, ptr %3, align 16
-!CHECK:  %6 = bitcast <2 x double> %5 to <16 x i8>
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> %4, <16 x i8> %6)
-!CHECK:  store <512 x i1> %7, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <2 x double>, ptr %3, align 16
+!LLVMIR:  %6 = bitcast <2 x double> %5 to <16 x i8>
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> %4, <16 x i8> %6)
+!LLVMIR:  store <512 x i1> %7, ptr %2, align 64
 
 
       subroutine test_xvf64gernn_u1()
@@ -418,14 +418,14 @@ subroutine test_xvf64gernn_u1()
       end subroutine test_xvf64gernn_u1
 
 !CHECK-LABEL: @test_xvf64gernn_u1_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %7, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %7, ptr %2, align 64
 
 
       subroutine test_xvf64gernn_r8()
@@ -438,15 +438,15 @@ subroutine test_xvf64gernn_r8()
       end subroutine test_xvf64gernn_r8
 
 !CHECK-LABEL: @test_xvf64gernn_r8_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <2 x double>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <2 x double>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = bitcast <2 x double> %5 to <16 x i8>
-!CHECK:  %8 = call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7)
-!CHECK:  store <512 x i1> %8, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <2 x double>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = bitcast <2 x double> %5 to <16 x i8>
+!LLVMIR:  %8 = call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7)
+!LLVMIR:  store <512 x i1> %8, ptr %2, align 64
 
       subroutine test_xvf64gernp_u1()
       use, intrinsic :: mma
@@ -458,14 +458,14 @@ subroutine test_xvf64gernp_u1()
       end subroutine test_xvf64gernp_u1
 
 !CHECK-LABEL: @test_xvf64gernp_u1_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %7, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %7, ptr %2, align 64
 
       subroutine test_xvf64gernp_r8()
       use, intrinsic :: mma
@@ -477,14 +477,14 @@ subroutine test_xvf64gernp_r8()
       end subroutine test_xvf64gernp_r8
 
 !CHECK-LABEL: @test_xvf64gernp_r8_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %7, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %7, ptr %2, align 64
 
       subroutine test_xvf64gerpn_u1()
       use, intrinsic :: mma
@@ -496,14 +496,14 @@ subroutine test_xvf64gerpn_u1()
       end subroutine test_xvf64gerpn_u1
 
 !CHECK-LABEL: @test_xvf64gerpn_u1_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %7, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %7, ptr %2, align 64
 
       subroutine test_xvf64gerpn_r8()
       use, intrinsic :: mma
@@ -515,15 +515,15 @@ subroutine test_xvf64gerpn_r8()
       end subroutine test_xvf64gerpn_r8
 
 !CHECK-LABEL: @test_xvf64gerpn_r8_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <2 x double>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <2 x double>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = bitcast <2 x double> %5 to <16 x i8>
-!CHECK:  %8 = call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7)
-!CHECK:  store <512 x i1> %8, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <2 x double>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = bitcast <2 x double> %5 to <16 x i8>
+!LLVMIR:  %8 = call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7)
+!LLVMIR:  store <512 x i1> %8, ptr %2, align 64
 
       subroutine test_xvf64gerpp_u1()
       use, intrinsic :: mma
@@ -535,14 +535,14 @@ subroutine test_xvf64gerpp_u1()
       end subroutine test_xvf64gerpp_u1
 
 !CHECK-LABEL: @test_xvf64gerpp_u1_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %7, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %7, ptr %2, align 64
 
 
       subroutine test_xvf64gerpp_r8()
@@ -555,15 +555,15 @@ subroutine test_xvf64gerpp_r8()
       end subroutine test_xvf64gerpp_r8
 
 !CHECK-LABEL: @test_xvf64gerpp_r8_
-!CHECK:  %1 = alloca <256 x i1>, i64 1, align 32
-!CHECK:  %2 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %3 = alloca <2 x double>, i64 1, align 16
-!CHECK:  %4 = load <256 x i1>, ptr %1, align 32
-!CHECK:  %5 = load <2 x double>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %2, align 64
-!CHECK:  %7 = bitcast <2 x double> %5 to <16 x i8>
-!CHECK:  %8 = call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7)
-!CHECK:  store <512 x i1> %8, ptr %2, align 64
+!LLVMIR:  %1 = alloca <256 x i1>, i64 1, align 32
+!LLVMIR:  %2 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %3 = alloca <2 x double>, i64 1, align 16
+!LLVMIR:  %4 = load <256 x i1>, ptr %1, align 32
+!LLVMIR:  %5 = load <2 x double>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %2, align 64
+!LLVMIR:  %7 = bitcast <2 x double> %5 to <16 x i8>
+!LLVMIR:  %8 = call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7)
+!LLVMIR:  store <512 x i1> %8, ptr %2, align 64
 
       subroutine test_xvi16ger2_u1()
       use, intrinsic :: mma
@@ -574,13 +574,13 @@ subroutine test_xvi16ger2_u1()
       end subroutine test_xvi16ger2_u1
 
 !CHECK-LABEL: @test_xvi16ger2_u1_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %6, ptr %1, align 64
 
       subroutine test_xvi16ger2_i2()
       use, intrinsic :: mma
@@ -591,15 +591,15 @@ subroutine test_xvi16ger2_i2()
       end subroutine test_xvi16ger2_i2
 
 !CHECK-LABEL: @test_xvi16ger2_i2_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %3 = alloca <8 x i16>, i64 1, align 16
-!CHECK:  %4 = load <8 x i16>, ptr %2, align 16
-!CHECK:  %5 = load <8 x i16>, ptr %3, align 16
-!CHECK:  %6 = bitcast <8 x i16> %4 to <16 x i8>
-!CHECK:  %7 = bitcast <8 x i16> %5 to <16 x i8>
-!CHECK:  %8 = call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> %6, <16 x i8> %7)
-!CHECK:  store <512 x i1> %8, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %3 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:  %4 = load <8 x i16>, ptr %2, align 16
+!LLVMIR:  %5 = load <8 x i16>, ptr %3, align 16
+!LLVMIR:  %6 = bitcast <8 x i16> %4 to <16 x i8>
+!LLVMIR:  %7 = bitcast <8 x i16> %5 to <16 x i8>
+!LLVMIR:  %8 = call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> %6, <16 x i8> %7)
+!LLVMIR:  store <512 x i1> %8, ptr %1, align 64
 
       subroutine test_xvi16ger2pp_u1()
       use, intrinsic :: mma
@@ -610,14 +610,14 @@ subroutine test_xvi16ger2pp_u1()
       end subroutine test_xvi16ger2pp_u1
 
 !CHECK-LABEL: @test_xvi16ger2pp_u1_
-!CHECK:   %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:   %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:   %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:   %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:   %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:   %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:   %7 = call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
-!CHECK:   store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:   %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:   %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:   %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:   %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:   %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:   %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:   %7 = call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
+!LLVMIR:   store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_xvi16ger2pp_i2()
       use, intrinsic :: mma
@@ -628,16 +628,16 @@ subroutine test_xvi16ger2pp_i2()
       end subroutine test_xvi16ger2pp_i2
 
 !CHECK-LABEL: @test_xvi16ger2pp_i2_
-!CHECK:   %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:   %2 = alloca <8 x i16>, i64 1, align 16
-!CHECK:   %3 = alloca <8 x i16>, i64 1, align 16
-!CHECK:   %4 = load <8 x i16>, ptr %2, align 16
-!CHECK:   %5 = load <8 x i16>, ptr %3, align 16
-!CHECK:   %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:   %7 = bitcast <8 x i16> %4 to <16 x i8>
-!CHECK:   %8 = bitcast <8 x i16> %5 to <16 x i8>
-!CHECK:   %9 = call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8)
-!CHECK:   store <512 x i1> %9, ptr %1, align 64
+!LLVMIR:   %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:   %2 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:   %3 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:   %4 = load <8 x i16>, ptr %2, align 16
+!LLVMIR:   %5 = load <8 x i16>, ptr %3, align 16
+!LLVMIR:   %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:   %7 = bitcast <8 x i16> %4 to <16 x i8>
+!LLVMIR:   %8 = bitcast <8 x i16> %5 to <16 x i8>
+!LLVMIR:   %9 = call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8)
+!LLVMIR:   store <512 x i1> %9, ptr %1, align 64
 
       subroutine test_xvi16ger2s_u1()
       use, intrinsic :: mma
@@ -648,13 +648,13 @@ subroutine test_xvi16ger2s_u1()
       end subroutine test_xvi16ger2s_u1
 
 !CHECK-LABEL:  @test_xvi16ger2s_u1_
-!CHECK:   %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:   %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:   %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:   %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:   %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:   %6 = call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> %4, <16 x i8> %5)
-!CHECK:   store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:   %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:   %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:   %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:   %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:   %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:   %6 = call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> %4, <16 x i8> %5)
+!LLVMIR:   store <512 x i1> %6, ptr %1, align 64
 
       subroutine test_xvi16ger2s_i2()
       use, intrinsic :: mma
@@ -665,15 +665,15 @@ subroutine test_xvi16ger2s_i2()
       end subroutine test_xvi16ger2s_i2
 
 !CHECK-LABEL:  @test_xvi16ger2s_i2_
-!CHECK:   %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:   %2 = alloca <8 x i16>, i64 1, align 16
-!CHECK:   %3 = alloca <8 x i16>, i64 1, align 16
-!CHECK:   %4 = load <8 x i16>, ptr %2, align 16
-!CHECK:   %5 = load <8 x i16>, ptr %3, align 16
-!CHECK:   %6 = bitcast <8 x i16> %4 to <16 x i8>
-!CHECK:   %7 = bitcast <8 x i16> %5 to <16 x i8>
-!CHECK:   %8 = call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> %6, <16 x i8> %7)
-!CHECK:   store <512 x i1> %8, ptr %1, align 64
+!LLVMIR:   %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:   %2 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:   %3 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:   %4 = load <8 x i16>, ptr %2, align 16
+!LLVMIR:   %5 = load <8 x i16>, ptr %3, align 16
+!LLVMIR:   %6 = bitcast <8 x i16> %4 to <16 x i8>
+!LLVMIR:   %7 = bitcast <8 x i16> %5 to <16 x i8>
+!LLVMIR:   %8 = call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> %6, <16 x i8> %7)
+!LLVMIR:   store <512 x i1> %8, ptr %1, align 64
 
       subroutine test_xvi16ger2spp_u1()
       use, intrinsic :: mma
@@ -684,14 +684,14 @@ subroutine test_xvi16ger2spp_u1()
       end subroutine test_xvi16ger2spp_u1
 
 !CHECK-LABEL:  @test_xvi16ger2spp_u1_
-!CHECK:   %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:   %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:   %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:   %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:   %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:   %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:   %7 = call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
-!CHECK:   store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:   %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:   %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:   %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:   %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:   %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:   %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:   %7 = call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
+!LLVMIR:   store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_xvi16ger2spp_i2()
       use, intrinsic :: mma
@@ -702,16 +702,16 @@ subroutine test_xvi16ger2spp_i2()
       end subroutine test_xvi16ger2spp_i2
 
 !CHECK-LABEL:  @test_xvi16ger2spp_i2_
-!CHECK:   %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:   %2 = alloca <8 x i16>, i64 1, align 16
-!CHECK:   %3 = alloca <8 x i16>, i64 1, align 16
-!CHECK:   %4 = load <8 x i16>, ptr %2, align 16
-!CHECK:   %5 = load <8 x i16>, ptr %3, align 16
-!CHECK:   %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:   %7 = bitcast <8 x i16> %4 to <16 x i8>
-!CHECK:   %8 = bitcast <8 x i16> %5 to <16 x i8>
-!CHECK:   %9 = call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8)
-!CHECK:   store <512 x i1> %9, ptr %1, align 64
+!LLVMIR:   %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:   %2 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:   %3 = alloca <8 x i16>, i64 1, align 16
+!LLVMIR:   %4 = load <8 x i16>, ptr %2, align 16
+!LLVMIR:   %5 = load <8 x i16>, ptr %3, align 16
+!LLVMIR:   %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:   %7 = bitcast <8 x i16> %4 to <16 x i8>
+!LLVMIR:   %8 = bitcast <8 x i16> %5 to <16 x i8>
+!LLVMIR:   %9 = call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8)
+!LLVMIR:   store <512 x i1> %9, ptr %1, align 64
 
       subroutine test_xvi4ger8()
       use, intrinsic :: mma
@@ -722,13 +722,13 @@ subroutine test_xvi4ger8()
       end subroutine test_xvi4ger8
 
 !CHECK-LABEL:  @test_xvi4ger8_
-!CHECK:   %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:   %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:   %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:   %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:   %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:   %6 = call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> %4, <16 x i8> %5)
-!CHECK:   store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:   %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:   %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:   %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:   %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:   %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:   %6 = call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> %4, <16 x i8> %5)
+!LLVMIR:   store <512 x i1> %6, ptr %1, align 64
 
       subroutine test_xvi4ger8pp()
       use, intrinsic :: mma
@@ -739,14 +739,14 @@ subroutine test_xvi4ger8pp()
       end subroutine test_xvi4ger8pp
 
 !CHECK-LABEL:  @test_xvi4ger8pp_
-!CHECK:   %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:   %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:   %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:   %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:   %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:   %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:   %7 = call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
-!CHECK:   store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:   %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:   %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:   %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:   %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:   %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:   %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:   %7 = call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
+!LLVMIR:   store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_xvi8ger4_u1()
       use, intrinsic :: mma
@@ -757,13 +757,13 @@ subroutine test_xvi8ger4_u1()
       end subroutine test_xvi8ger4_u1
 
 !CHECK-LABEL: @test_xvi8ger4_u1_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %6, ptr %1, align 64
 
 
       subroutine test_xvi8ger4_i1()
@@ -775,13 +775,13 @@ subroutine test_xvi8ger4_i1()
       end subroutine test_xvi8ger4_i1
 
 !CHECK-LABEL: @test_xvi8ger4_i1_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %6, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %6, ptr %1, align 64
 
       subroutine test_xvi8ger4pp_u1()
       use, intrinsic :: mma
@@ -792,14 +792,14 @@ subroutine test_xvi8ger4pp_u1()
       end subroutine test_xvi8ger4pp_u1
 
 !CHECK-LABEL: @test_xvi8ger4pp_u1_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_xvi8ger4pp_i1()
       use, intrinsic :: mma
@@ -810,14 +810,14 @@ subroutine test_xvi8ger4pp_i1()
       end subroutine test_xvi8ger4pp_i1
 
 !CHECK-LABEL:  @test_xvi8ger4pp_i1_
-!CHECK:   %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:   %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:   %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:   %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:   %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:   %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:   %7 = call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
-!CHECK:   store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:   %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:   %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:   %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:   %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:   %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:   %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:   %7 = call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
+!LLVMIR:   store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_xvi8ger4spp_u1()
       use, intrinsic :: mma
@@ -828,14 +828,14 @@ subroutine test_xvi8ger4spp_u1()
       end subroutine test_xvi8ger4spp_u1
 
 !CHECK-LABEL: @test_xvi8ger4spp_u1_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64
 
       subroutine test_xvi8ger4spp_i1()
       use, intrinsic :: mma
@@ -846,11 +846,11 @@ subroutine test_xvi8ger4spp_i1()
       end subroutine test_xvi8ger4spp_i1
 
 !CHECK-LABEL: @test_xvi8ger4spp_i1_
-!CHECK:  %1 = alloca <512 x i1>, i64 1, align 64
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %4 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %5 = load <16 x i8>, ptr %3, align 16
-!CHECK:  %6 = load <512 x i1>, ptr %1, align 64
-!CHECK:  %7 = call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
-!CHECK:  store <512 x i1> %7, ptr %1, align 64
+!LLVMIR:  %1 = alloca <512 x i1>, i64 1, align 64
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %4 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %5 = load <16 x i8>, ptr %3, align 16
+!LLVMIR:  %6 = load <512 x i1>, ptr %1, align 64
+!LLVMIR:  %7 = call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
+!LLVMIR:  store <512 x i1> %7, ptr %1, align 64

diff  --git a/flang/test/Lower/PowerPC/ppc-pwr10-vec-intrinsics.f90 b/flang/test/Lower/PowerPC/ppc-pwr10-vec-intrinsics.f90
index 07e8795b4bc7839..c49f6f06c60ed0d 100644
--- a/flang/test/Lower/PowerPC/ppc-pwr10-vec-intrinsics.f90
+++ b/flang/test/Lower/PowerPC/ppc-pwr10-vec-intrinsics.f90
@@ -1,4 +1,4 @@
-! RUN: %flang_fc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -triple powerpc64le-unknown-unknown -target-cpu pwr10 -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
 !----------------------
@@ -14,10 +14,10 @@ subroutine mma_lxvp_test_i2(v1, offset, vp)
       end subroutine mma_lxvp_test_i2
 
 !CHECK-LABEL: @mma_lxvp_test_i2_
-!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
-!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
-!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+!LLVMIR:  %[[offset:.*]] = load i16, ptr %1, align 2
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+!LLVMIR:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!LLVMIR:  store <256 x i1> %[[call]], ptr %2, align 32
 
       subroutine test_cvspbf16()
       implicit none
@@ -26,11 +26,11 @@ subroutine test_cvspbf16()
       end subroutine test_cvspbf16
 
 !CHECK-LABEL: @test_cvspbf16_
-!CHECK:  %1 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %4 = call <16 x i8> @llvm.ppc.vsx.xvcvspbf16(<16 x i8> %3)
-!CHECK:  store <16 x i8> %4, ptr %1, align 16
+!LLVMIR:  %1 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %4 = call <16 x i8> @llvm.ppc.vsx.xvcvspbf16(<16 x i8> %3)
+!LLVMIR:  store <16 x i8> %4, ptr %1, align 16
 
       subroutine test_cvbf16spn()
       implicit none
@@ -39,11 +39,11 @@ subroutine test_cvbf16spn()
       end subroutine test_cvbf16spn
 
 !CHECK-LABEL: @test_cvbf16spn_
-!CHECK:  %1 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %2 = alloca <16 x i8>, i64 1, align 16
-!CHECK:  %3 = load <16 x i8>, ptr %2, align 16
-!CHECK:  %4 = call <16 x i8> @llvm.ppc.vsx.xvcvbf16spn(<16 x i8> %3)
-!CHECK:  store <16 x i8> %4, ptr %1, align 16
+!LLVMIR:  %1 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %2 = alloca <16 x i8>, i64 1, align 16
+!LLVMIR:  %3 = load <16 x i8>, ptr %2, align 16
+!LLVMIR:  %4 = call <16 x i8> @llvm.ppc.vsx.xvcvbf16spn(<16 x i8> %3)
+!LLVMIR:  store <16 x i8> %4, ptr %1, align 16
 
 !----------------------
 ! vec_lxvp
@@ -57,10 +57,10 @@ subroutine vec_lxvp_test_i2(v1, offset, vp)
       end subroutine vec_lxvp_test_i2
 
 !CHECK-LABEL: @vec_lxvp_test_i2_
-!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
-!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
-!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+!LLVMIR:  %[[offset:.*]] = load i16, ptr %1, align 2
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+!LLVMIR:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!LLVMIR:  store <256 x i1> %[[call]], ptr %2, align 32
 
       subroutine vec_lxvp_test_i4(v1, offset, vp)
       integer(2) :: offset
@@ -70,10 +70,10 @@ subroutine vec_lxvp_test_i4(v1, offset, vp)
       end subroutine vec_lxvp_test_i4
 
 !CHECK-LABEL: @vec_lxvp_test_i4_
-!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
-!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
-!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+!LLVMIR:  %[[offset:.*]] = load i16, ptr %1, align 2
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+!LLVMIR:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!LLVMIR:  store <256 x i1> %[[call]], ptr %2, align 32
 
       subroutine vec_lxvp_test_u2(v1, offset, vp)
       integer(2) :: offset
@@ -83,10 +83,10 @@ subroutine vec_lxvp_test_u2(v1, offset, vp)
       end subroutine vec_lxvp_test_u2
 
 !CHECK-LABEL: @vec_lxvp_test_u2_
-!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
-!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
-!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+!LLVMIR:  %[[offset:.*]] = load i16, ptr %1, align 2
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+!LLVMIR:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!LLVMIR:  store <256 x i1> %[[call]], ptr %2, align 32
 
       subroutine vec_lxvp_test_u4(v1, offset, vp)
       integer(2) :: offset
@@ -96,10 +96,10 @@ subroutine vec_lxvp_test_u4(v1, offset, vp)
       end subroutine vec_lxvp_test_u4
 
 !CHECK-LABEL: @vec_lxvp_test_u4_
-!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
-!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
-!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+!LLVMIR:  %[[offset:.*]] = load i16, ptr %1, align 2
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+!LLVMIR:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!LLVMIR:  store <256 x i1> %[[call]], ptr %2, align 32
 
       subroutine vec_lxvp_test_r4(v1, offset, vp)
       integer(2) :: offset
@@ -109,10 +109,10 @@ subroutine vec_lxvp_test_r4(v1, offset, vp)
       end subroutine vec_lxvp_test_r4
 
 !CHECK-LABEL: @vec_lxvp_test_r4_
-!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
-!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
-!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+!LLVMIR:  %[[offset:.*]] = load i16, ptr %1, align 2
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+!LLVMIR:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!LLVMIR:  store <256 x i1> %[[call]], ptr %2, align 32
 
       subroutine vec_lxvp_test_r8(v1, offset, vp)
       integer(2) :: offset
@@ -122,10 +122,10 @@ subroutine vec_lxvp_test_r8(v1, offset, vp)
       end subroutine vec_lxvp_test_r8
 
 !CHECK-LABEL: @vec_lxvp_test_r8_
-!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
-!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
-!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+!LLVMIR:  %[[offset:.*]] = load i16, ptr %1, align 2
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+!LLVMIR:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!LLVMIR:  store <256 x i1> %[[call]], ptr %2, align 32
 
       subroutine vec_lxvp_test_vp(v1, offset, vp)
       integer(2) :: offset
@@ -135,10 +135,10 @@ subroutine vec_lxvp_test_vp(v1, offset, vp)
       end subroutine vec_lxvp_test_vp
 
 !CHECK-LABEL: @vec_lxvp_test_vp_
-!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
-!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
-!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+!LLVMIR:  %[[offset:.*]] = load i16, ptr %1, align 2
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+!LLVMIR:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!LLVMIR:  store <256 x i1> %[[call]], ptr %2, align 32
 
       subroutine vec_lxvp_test_i2_arr(v1, offset, vp)
       integer :: offset
@@ -148,10 +148,10 @@ subroutine vec_lxvp_test_i2_arr(v1, offset, vp)
       end subroutine vec_lxvp_test_i2_arr
 
 !CHECK-LABEL: @vec_lxvp_test_i2_arr_
-!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
-!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
-!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+!LLVMIR:  %[[offset:.*]] = load i32, ptr %1, align 4
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
+!LLVMIR:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!LLVMIR:  store <256 x i1> %[[call]], ptr %2, align 32
 
       subroutine vec_lxvp_test_i4_arr(v1, offset, vp)
       integer :: offset
@@ -161,10 +161,10 @@ subroutine vec_lxvp_test_i4_arr(v1, offset, vp)
       end subroutine vec_lxvp_test_i4_arr
 
 !CHECK-LABEL: @vec_lxvp_test_i4_arr_
-!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
-!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
-!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+!LLVMIR:  %[[offset:.*]] = load i32, ptr %1, align 4
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
+!LLVMIR:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!LLVMIR:  store <256 x i1> %[[call]], ptr %2, align 32
 
       subroutine vec_lxvp_test_u2_arr(v1, offset, vp)
       integer :: offset
@@ -174,10 +174,10 @@ subroutine vec_lxvp_test_u2_arr(v1, offset, vp)
       end subroutine vec_lxvp_test_u2_arr
 
 !CHECK-LABEL: @vec_lxvp_test_u2_arr_
-!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
-!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
-!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+!LLVMIR:  %[[offset:.*]] = load i32, ptr %1, align 4
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
+!LLVMIR:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!LLVMIR:  store <256 x i1> %[[call]], ptr %2, align 32
 
       subroutine vec_lxvp_test_u4_arr(v1, offset, vp)
       integer :: offset
@@ -187,10 +187,10 @@ subroutine vec_lxvp_test_u4_arr(v1, offset, vp)
       end subroutine vec_lxvp_test_u4_arr
 
 !CHECK-LABEL: @vec_lxvp_test_u4_arr_
-!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
-!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
-!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+!LLVMIR:  %[[offset:.*]] = load i32, ptr %1, align 4
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
+!LLVMIR:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!LLVMIR:  store <256 x i1> %[[call]], ptr %2, align 32
 
       subroutine vec_lxvp_test_r4_arr(v1, offset, vp)
       integer :: offset
@@ -200,10 +200,10 @@ subroutine vec_lxvp_test_r4_arr(v1, offset, vp)
       end subroutine vec_lxvp_test_r4_arr
 
 !CHECK-LABEL: @vec_lxvp_test_r4_arr_
-!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
-!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
-!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+!LLVMIR:  %[[offset:.*]] = load i32, ptr %1, align 4
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
+!LLVMIR:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!LLVMIR:  store <256 x i1> %[[call]], ptr %2, align 32
 
       subroutine vec_lxvp_test_r8_arr(v1, offset, vp)
       integer :: offset
@@ -213,10 +213,10 @@ subroutine vec_lxvp_test_r8_arr(v1, offset, vp)
       end subroutine vec_lxvp_test_r8_arr
 
 !CHECK-LABEL: @vec_lxvp_test_r8_arr_
-!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
-!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
-!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+!LLVMIR:  %[[offset:.*]] = load i32, ptr %1, align 4
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
+!LLVMIR:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!LLVMIR:  store <256 x i1> %[[call]], ptr %2, align 32
 
       subroutine vec_lxvp_test_vp_arr(v1, offset, vp)
       integer(8) :: offset
@@ -226,10 +226,10 @@ subroutine vec_lxvp_test_vp_arr(v1, offset, vp)
       end subroutine vec_lxvp_test_vp_arr
 
 !CHECK-LABEL: @vec_lxvp_test_vp_arr_
-!CHECK:  %[[offset:.*]] = load i64, ptr %1, align 8
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i64 %[[offset]]
-!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
-!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+!LLVMIR:  %[[offset:.*]] = load i64, ptr %1, align 8
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %0, i64 %[[offset]]
+!LLVMIR:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!LLVMIR:  store <256 x i1> %[[call]], ptr %2, align 32
 
 !----------------------
 ! vsx_lxvp
@@ -243,10 +243,10 @@ subroutine vsx_lxvp_test_i4(v1, offset, vp)
       end subroutine vsx_lxvp_test_i4
 
 !CHECK-LABEL: @vsx_lxvp_test_i4_
-!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
-!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
-!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+!LLVMIR:  %[[offset:.*]] = load i16, ptr %1, align 2
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+!LLVMIR:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!LLVMIR:  store <256 x i1> %[[call]], ptr %2, align 32
 
       subroutine vsx_lxvp_test_r8(v1, offset, vp)
       integer(2) :: offset
@@ -256,10 +256,10 @@ subroutine vsx_lxvp_test_r8(v1, offset, vp)
       end subroutine vsx_lxvp_test_r8
 
 !CHECK-LABEL: @vsx_lxvp_test_r8_
-!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
-!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
-!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+!LLVMIR:  %[[offset:.*]] = load i16, ptr %1, align 2
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+!LLVMIR:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!LLVMIR:  store <256 x i1> %[[call]], ptr %2, align 32
 
       subroutine vsx_lxvp_test_i2_arr(v1, offset, vp)
       integer :: offset
@@ -269,10 +269,10 @@ subroutine vsx_lxvp_test_i2_arr(v1, offset, vp)
       end subroutine vsx_lxvp_test_i2_arr
 
 !CHECK-LABEL: @vsx_lxvp_test_i2_arr_
-!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
-!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
-!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+!LLVMIR:  %[[offset:.*]] = load i32, ptr %1, align 4
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
+!LLVMIR:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!LLVMIR:  store <256 x i1> %[[call]], ptr %2, align 32
 
       subroutine vsx_lxvp_test_vp_arr(v1, offset, vp)
       integer(8) :: offset
@@ -282,10 +282,10 @@ subroutine vsx_lxvp_test_vp_arr(v1, offset, vp)
       end subroutine vsx_lxvp_test_vp_arr
 
 !CHECK-LABEL: @vsx_lxvp_test_vp_arr_
-!CHECK:  %[[offset:.*]] = load i64, ptr %1, align 8
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i64 %[[offset]]
-!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
-!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+!LLVMIR:  %[[offset:.*]] = load i64, ptr %1, align 8
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %0, i64 %[[offset]]
+!LLVMIR:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!LLVMIR:  store <256 x i1> %[[call]], ptr %2, align 32
 
 !----------------------
 ! mma_stxvp
@@ -300,10 +300,10 @@ subroutine test_mma_stxvp_i1(vp, offset, v1)
       end subroutine test_mma_stxvp_i1
 
 !CHECK-LABEL: @test_mma_stxvp_i1_
-!CHECK:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
-!CHECK:  %[[offset:.*]] = load i8, ptr %1, align 1
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %2, i8 %[[offset]]
-!CHECK:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
+!LLVMIR:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
+!LLVMIR:  %[[offset:.*]] = load i8, ptr %1, align 1
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %2, i8 %[[offset]]
+!LLVMIR:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
 
 !----------------------
 ! vec_stxvp
@@ -317,10 +317,10 @@ subroutine test_vec_stxvp_i1(vp, offset, v1)
       end subroutine test_vec_stxvp_i1
 
 !CHECK-LABEL: @test_vec_stxvp_i1_
-!CHECK:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
-!CHECK:  %[[offset:.*]] = load i8, ptr %1, align 1
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %2, i8 %[[offset]]
-!CHECK:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
+!LLVMIR:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
+!LLVMIR:  %[[offset:.*]] = load i8, ptr %1, align 1
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %2, i8 %[[offset]]
+!LLVMIR:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
 
       subroutine test_vec_stxvp_i8(vp, offset, v1)
       integer(8) :: offset
@@ -330,10 +330,10 @@ subroutine test_vec_stxvp_i8(vp, offset, v1)
       end subroutine test_vec_stxvp_i8
 
 !CHECK-LABEL: @test_vec_stxvp_i8_
-!CHECK:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
-!CHECK:  %[[offset:.*]] = load i64, ptr %1, align 8
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %2, i64 %[[offset]]
-!CHECK:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
+!LLVMIR:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
+!LLVMIR:  %[[offset:.*]] = load i64, ptr %1, align 8
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %2, i64 %[[offset]]
+!LLVMIR:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
 
       subroutine test_vec_stxvp_vi2(vp, offset, v1)
       integer(2) :: offset
@@ -343,10 +343,10 @@ subroutine test_vec_stxvp_vi2(vp, offset, v1)
       end subroutine test_vec_stxvp_vi2
 
 !CHECK-LABEL: @test_vec_stxvp_vi2_
-!CHECK:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
-!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %2, i16 %[[offset]]
-!CHECK:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
+!LLVMIR:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
+!LLVMIR:  %[[offset:.*]] = load i16, ptr %1, align 2
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %2, i16 %[[offset]]
+!LLVMIR:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
 
       subroutine test_vec_stxvp_vi4(vp, offset, v1)
       integer(2) :: offset
@@ -356,10 +356,10 @@ subroutine test_vec_stxvp_vi4(vp, offset, v1)
       end subroutine test_vec_stxvp_vi4
 
 !CHECK-LABEL: @test_vec_stxvp_vi4_
-!CHECK:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
-!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %2, i16 %[[offset]]
-!CHECK:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
+!LLVMIR:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
+!LLVMIR:  %[[offset:.*]] = load i16, ptr %1, align 2
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %2, i16 %[[offset]]
+!LLVMIR:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
 
       subroutine test_vec_stxvp_vu2(vp, offset, v1)
       integer(2) :: offset
@@ -369,10 +369,10 @@ subroutine test_vec_stxvp_vu2(vp, offset, v1)
       end subroutine test_vec_stxvp_vu2
 
 !CHECK-LABEL: @test_vec_stxvp_vu2_
-!CHECK:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
-!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %2, i16 %[[offset]]
-!CHECK:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
+!LLVMIR:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
+!LLVMIR:  %[[offset:.*]] = load i16, ptr %1, align 2
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %2, i16 %[[offset]]
+!LLVMIR:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
 
       subroutine test_vec_stxvp_vu4(vp, offset, v1)
       integer(2) :: offset
@@ -382,10 +382,10 @@ subroutine test_vec_stxvp_vu4(vp, offset, v1)
       end subroutine test_vec_stxvp_vu4
 
 !CHECK-LABEL: @test_vec_stxvp_vu4_
-!CHECK:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
-!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %2, i16 %[[offset]]
-!CHECK:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
+!LLVMIR:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
+!LLVMIR:  %[[offset:.*]] = load i16, ptr %1, align 2
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %2, i16 %[[offset]]
+!LLVMIR:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
 
       subroutine test_vec_stxvp_vr4(vp, offset, v1)
       integer(2) :: offset
@@ -395,10 +395,10 @@ subroutine test_vec_stxvp_vr4(vp, offset, v1)
       end subroutine test_vec_stxvp_vr4
 
 !CHECK-LABEL: @test_vec_stxvp_vr4_
-!CHECK:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
-!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %2, i16 %[[offset]]
-!CHECK:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
+!LLVMIR:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
+!LLVMIR:  %[[offset:.*]] = load i16, ptr %1, align 2
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %2, i16 %[[offset]]
+!LLVMIR:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
 
       subroutine test_vec_stxvp_vr8(vp, offset, v1)
       integer(2) :: offset
@@ -408,10 +408,10 @@ subroutine test_vec_stxvp_vr8(vp, offset, v1)
       end subroutine test_vec_stxvp_vr8
 
 !CHECK-LABEL: @test_vec_stxvp_vr8_
-!CHECK:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
-!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %2, i16 %[[offset]]
-!CHECK:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
+!LLVMIR:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
+!LLVMIR:  %[[offset:.*]] = load i16, ptr %1, align 2
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %2, i16 %[[offset]]
+!LLVMIR:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
 
       subroutine test_vec_stxvp_vvp(vp, offset, v1)
       integer(2) :: offset
@@ -421,10 +421,10 @@ subroutine test_vec_stxvp_vvp(vp, offset, v1)
       end subroutine test_vec_stxvp_vvp
 
 !CHECK-LABEL: @test_vec_stxvp_vvp_
-!CHECK:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
-!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %2, i16 %[[offset]]
-!CHECK:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
+!LLVMIR:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
+!LLVMIR:  %[[offset:.*]] = load i16, ptr %1, align 2
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %2, i16 %[[offset]]
+!LLVMIR:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
 
       subroutine test_vec_stxvp_vi2_arr(vp, offset, v1)
       integer :: offset
@@ -434,10 +434,10 @@ subroutine test_vec_stxvp_vi2_arr(vp, offset, v1)
       end subroutine test_vec_stxvp_vi2_arr
 
 !CHECK-LABEL: @test_vec_stxvp_vi2_arr_
-!CHECK:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
-!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[offset]]
-!CHECK:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
+!LLVMIR:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
+!LLVMIR:  %[[offset:.*]] = load i32, ptr %1, align 4
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[offset]]
+!LLVMIR:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
 
       subroutine test_vec_stxvp_vi4_arr(vp, offset, v1)
       integer :: offset
@@ -447,10 +447,10 @@ subroutine test_vec_stxvp_vi4_arr(vp, offset, v1)
       end subroutine test_vec_stxvp_vi4_arr
 
 !CHECK-LABEL: @test_vec_stxvp_vi4_arr_
-!CHECK:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
-!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[offset]]
-!CHECK:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
+!LLVMIR:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
+!LLVMIR:  %[[offset:.*]] = load i32, ptr %1, align 4
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[offset]]
+!LLVMIR:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
 
       subroutine test_vec_stxvp_vu2_arr(vp, offset, v1)
       integer :: offset
@@ -460,10 +460,10 @@ subroutine test_vec_stxvp_vu2_arr(vp, offset, v1)
       end subroutine test_vec_stxvp_vu2_arr
 
 !CHECK-LABEL: @test_vec_stxvp_vu2_arr_
-!CHECK:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
-!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[offset]]
-!CHECK:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
+!LLVMIR:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
+!LLVMIR:  %[[offset:.*]] = load i32, ptr %1, align 4
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[offset]]
+!LLVMIR:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
 
       subroutine test_vec_stxvp_vu4_arr(vp, offset, v1)
       integer(8) :: offset
@@ -473,10 +473,10 @@ subroutine test_vec_stxvp_vu4_arr(vp, offset, v1)
       end subroutine test_vec_stxvp_vu4_arr
 
 !CHECK-LABEL: @test_vec_stxvp_vu4_arr_
-!CHECK:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
-!CHECK:  %[[offset:.*]] = load i64, ptr %1, align 8
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %2, i64 %[[offset]]
-!CHECK:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
+!LLVMIR:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
+!LLVMIR:  %[[offset:.*]] = load i64, ptr %1, align 8
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %2, i64 %[[offset]]
+!LLVMIR:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
 
       subroutine test_vec_stxvp_vr4_arr(vp, offset, v1)
       integer :: offset
@@ -486,10 +486,10 @@ subroutine test_vec_stxvp_vr4_arr(vp, offset, v1)
       end subroutine test_vec_stxvp_vr4_arr
 
 !CHECK-LABEL: @test_vec_stxvp_vr4_arr_
-!CHECK:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
-!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[offset]]
-!CHECK:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
+!LLVMIR:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
+!LLVMIR:  %[[offset:.*]] = load i32, ptr %1, align 4
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[offset]]
+!LLVMIR:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
 
       subroutine test_vec_stxvp_vr8_arr(vp, offset, v1)
       integer :: offset
@@ -499,10 +499,10 @@ subroutine test_vec_stxvp_vr8_arr(vp, offset, v1)
       end subroutine test_vec_stxvp_vr8_arr
 
 !CHECK-LABEL: @test_vec_stxvp_vr8_arr_
-!CHECK:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
-!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[offset]]
-!CHECK:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
+!LLVMIR:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
+!LLVMIR:  %[[offset:.*]] = load i32, ptr %1, align 4
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[offset]]
+!LLVMIR:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
 
       subroutine test_vec_stxvp_vp_arr(vp, offset, v1)
       integer :: offset
@@ -512,10 +512,10 @@ subroutine test_vec_stxvp_vp_arr(vp, offset, v1)
       end subroutine test_vec_stxvp_vp_arr
 
 !CHECK-LABEL: @test_vec_stxvp_vp_arr_
-!CHECK:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
-!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[offset]]
-!CHECK:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
+!LLVMIR:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
+!LLVMIR:  %[[offset:.*]] = load i32, ptr %1, align 4
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[offset]]
+!LLVMIR:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
 
 !----------------------
 ! vsx_stxvp
@@ -529,10 +529,10 @@ subroutine test_vsx_stxvp_i1(vp, offset, v1)
       end subroutine test_vsx_stxvp_i1
 
 !CHECK-LABEL: @test_vsx_stxvp_i1_
-!CHECK:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
-!CHECK:  %[[offset:.*]] = load i8, ptr %1, align 1
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %2, i8 %[[offset]]
-!CHECK:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
+!LLVMIR:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
+!LLVMIR:  %[[offset:.*]] = load i8, ptr %1, align 1
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %2, i8 %[[offset]]
+!LLVMIR:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
 
       subroutine test_vsx_stxvp_vi2(vp, offset, v1)
       integer(2) :: offset
@@ -542,10 +542,10 @@ subroutine test_vsx_stxvp_vi2(vp, offset, v1)
       end subroutine test_vsx_stxvp_vi2
 
 !CHECK-LABEL: @test_vsx_stxvp_vi2_
-!CHECK:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
-!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %2, i16 %[[offset]]
-!CHECK:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
+!LLVMIR:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
+!LLVMIR:  %[[offset:.*]] = load i16, ptr %1, align 2
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %2, i16 %[[offset]]
+!LLVMIR:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
 
       subroutine test_vsx_stxvp_vr8_arr(vp, offset, v1)
       integer :: offset
@@ -555,10 +555,10 @@ subroutine test_vsx_stxvp_vr8_arr(vp, offset, v1)
       end subroutine test_vsx_stxvp_vr8_arr
 
 !CHECK-LABEL: @test_vsx_stxvp_vr8_arr_
-!CHECK:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
-!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[offset]]
-!CHECK:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
+!LLVMIR:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
+!LLVMIR:  %[[offset:.*]] = load i32, ptr %1, align 4
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[offset]]
+!LLVMIR:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
 
       subroutine test_vsx_stxvp_vp_arr(vp, offset, v1)
       integer :: offset
@@ -568,7 +568,7 @@ subroutine test_vsx_stxvp_vp_arr(vp, offset, v1)
       end subroutine test_vsx_stxvp_vp_arr
 
 !CHECK-LABEL: @test_vsx_stxvp_vp_arr_
-!CHECK:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
-!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
-!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[offset]]
-!CHECK:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])
+!LLVMIR:  %[[vp:.*]] = load <256 x i1>, ptr %0, align 32
+!LLVMIR:  %[[offset:.*]] = load i32, ptr %1, align 4
+!LLVMIR:  %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[offset]]
+!LLVMIR:  call void @llvm.ppc.vsx.stxvp(<256 x i1> %[[vp]], ptr %[[addr]])

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-abs.f90 b/flang/test/Lower/PowerPC/ppc-vec-abs.f90
new file mode 100644
index 000000000000000..33ff1b603acccfe
--- /dev/null
+++ b/flang/test/Lower/PowerPC/ppc-vec-abs.f90
@@ -0,0 +1,64 @@
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
+! REQUIRES: target=powerpc{{.*}}
+
+!----------------------
+! vec_abs
+!----------------------
+
+! CHECK-LABEL: vec_abs_i1
+subroutine vec_abs_i1(arg1)
+  vector(integer(1)) :: arg1, r
+  r = vec_abs(arg1)
+
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[sub:.*]] = sub <16 x i8> zeroinitializer, %[[arg1]]
+! LLVMIR: %{{[0-9]+}} = call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %[[sub]], <16 x i8> %[[arg1]])
+end subroutine vec_abs_i1
+
+! CHECK-LABEL: vec_abs_i2
+subroutine vec_abs_i2(arg1)
+  vector(integer(2)) :: arg1, r
+  r = vec_abs(arg1)
+
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[sub:.*]] = sub <8 x i16> zeroinitializer, %[[arg1]]
+! LLVMIR: %{{[0-9]+}} = call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %[[sub]], <8 x i16> %[[arg1]])
+end subroutine vec_abs_i2
+
+! CHECK-LABEL: vec_abs_i4
+subroutine vec_abs_i4(arg1)
+  vector(integer(4)) :: arg1, r
+  r = vec_abs(arg1)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[sub:.*]] = sub <4 x i32> zeroinitializer, %[[arg1]]
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %[[sub]], <4 x i32> %[[arg1]])
+end subroutine vec_abs_i4
+
+! CHECK-LABEL: vec_abs_i8
+subroutine vec_abs_i8(arg1)
+  vector(integer(8)) :: arg1, r
+  r = vec_abs(arg1)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[sub:.*]] = sub <2 x i64> zeroinitializer, %[[arg1]]
+! LLVMIR: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.altivec.vmaxsd(<2 x i64> %[[sub]], <2 x i64> %[[arg1]])
+end subroutine vec_abs_i8
+
+! CHECK-LABEL: vec_abs_r4
+subroutine vec_abs_r4(arg1)
+  vector(real(4)) :: arg1, r
+  r = vec_abs(arg1)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call contract <4 x float> @llvm.fabs.v4f32(<4 x float> %[[arg1]])
+end subroutine vec_abs_r4
+
+! CHECK-LABEL: vec_abs_r8
+subroutine vec_abs_r8(arg1)
+  vector(real(8)) :: arg1, r
+  r = vec_abs(arg1)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call contract <2 x double> @llvm.fabs.v2f64(<2 x double> %[[arg1]])
+end subroutine vec_abs_r8

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-add-and-mul-sub-xor.f90 b/flang/test/Lower/PowerPC/ppc-vec-add-and-mul-sub-xor.f90
new file mode 100644
index 000000000000000..bea1eaa8bc42d02
--- /dev/null
+++ b/flang/test/Lower/PowerPC/ppc-vec-add-and-mul-sub-xor.f90
@@ -0,0 +1,529 @@
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
+! REQUIRES: target=powerpc{{.*}}
+
+! vec_add
+
+! CHECK-LABEL: vec_add_testf32
+subroutine vec_add_testf32(x, y)
+  vector(real(4)) :: vsum, x, y
+  vsum = vec_add(x, y)
+
+! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = fadd contract <4 x float> %[[x]], %[[y]]
+end subroutine vec_add_testf32
+
+! CHECK-LABEL: vec_add_testf64
+subroutine vec_add_testf64(x, y)
+  vector(real(8)) :: vsum, x, y
+  vsum = vec_add(x, y)
+
+! LLVMIR: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = fadd contract <2 x double> %[[x]], %[[y]]
+end subroutine vec_add_testf64
+
+! CHECK-LABEL: vec_add_testi8
+subroutine vec_add_testi8(x, y)
+  vector(integer(1)) :: vsum, x, y
+  vsum = vec_add(x, y)
+
+! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = add <16 x i8> %[[x]], %[[y]]
+end subroutine vec_add_testi8
+
+! CHECK-LABEL: vec_add_testi16
+subroutine vec_add_testi16(x, y)
+  vector(integer(2)) :: vsum, x, y
+  vsum = vec_add(x, y)
+
+! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = add <8 x i16> %[[x]], %[[y]]
+end subroutine vec_add_testi16
+
+! CHECK-LABEL: vec_add_testi32
+subroutine vec_add_testi32(x, y)
+  vector(integer(4)) :: vsum, x, y
+  vsum = vec_add(x, y)
+
+! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = add <4 x i32> %[[x]], %[[y]]
+end subroutine vec_add_testi32
+
+! CHECK-LABEL: vec_add_testi64
+subroutine vec_add_testi64(x, y)
+  vector(integer(8)) :: vsum, x, y
+  vsum = vec_add(x, y)
+
+! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = add <2 x i64> %[[x]], %[[y]]
+end subroutine vec_add_testi64
+
+! CHECK-LABEL: vec_add_testui8
+subroutine vec_add_testui8(x, y)
+  vector(unsigned(1)) :: vsum, x, y
+  vsum = vec_add(x, y)
+
+! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = add <16 x i8> %[[x]], %[[y]]
+end subroutine vec_add_testui8
+
+! CHECK-LABEL: vec_add_testui16
+subroutine vec_add_testui16(x, y)
+  vector(unsigned(2)) :: vsum, x, y
+  vsum = vec_add(x, y)
+
+! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = add <8 x i16> %[[x]], %[[y]]
+end subroutine vec_add_testui16
+
+! CHECK-LABEL: vec_add_testui32
+subroutine vec_add_testui32(x, y)
+  vector(unsigned(4)) :: vsum, x, y
+  vsum = vec_add(x, y)
+
+! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = add <4 x i32> %[[x]], %[[y]]
+end subroutine vec_add_testui32
+
+! CHECK-LABEL: vec_add_testui64
+subroutine vec_add_testui64(x, y)
+  vector(unsigned(8)) :: vsum, x, y
+  vsum = vec_add(x, y)
+
+! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = add <2 x i64> %[[x]], %[[y]]
+end subroutine vec_add_testui64
+
+! vec_mul
+
+! CHECK-LABEL: vec_mul_testf32
+subroutine vec_mul_testf32(x, y)
+  vector(real(4)) :: vmul, x, y
+  vmul = vec_mul(x, y)
+
+! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = fmul contract <4 x float> %[[x]], %[[y]]
+end subroutine vec_mul_testf32
+
+! CHECK-LABEL: vec_mul_testf64
+subroutine vec_mul_testf64(x, y)
+  vector(real(8)) :: vmul, x, y
+  vmul = vec_mul(x, y)
+
+! LLVMIR: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = fmul contract <2 x double> %[[x]], %[[y]]
+end subroutine vec_mul_testf64
+
+! CHECK-LABEL: vec_mul_testi8
+subroutine vec_mul_testi8(x, y)
+  vector(integer(1)) :: vmul, x, y
+  vmul = vec_mul(x, y)
+
+! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = mul <16 x i8> %[[x]], %[[y]]
+end subroutine vec_mul_testi8
+
+! CHECK-LABEL: vec_mul_testi16
+subroutine vec_mul_testi16(x, y)
+  vector(integer(2)) :: vmul, x, y
+  vmul = vec_mul(x, y)
+
+! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = mul <8 x i16> %[[x]], %[[y]]
+end subroutine vec_mul_testi16
+
+! CHECK-LABEL: vec_mul_testi32
+subroutine vec_mul_testi32(x, y)
+  vector(integer(4)) :: vmul, x, y
+  vmul = vec_mul(x, y)
+
+! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = mul <4 x i32> %[[x]], %[[y]]
+end subroutine vec_mul_testi32
+
+! CHECK-LABEL: vec_mul_testi64
+subroutine vec_mul_testi64(x, y)
+  vector(integer(8)) :: vmul, x, y
+  vmul = vec_mul(x, y)
+
+! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = mul <2 x i64> %[[x]], %[[y]]
+end subroutine vec_mul_testi64
+
+! CHECK-LABEL: vec_mul_testui8
+subroutine vec_mul_testui8(x, y)
+  vector(unsigned(1)) :: vmul, x, y
+  vmul = vec_mul(x, y)
+
+! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = mul <16 x i8> %[[x]], %[[y]]
+end subroutine vec_mul_testui8
+
+! CHECK-LABEL: vec_mul_testui16
+subroutine vec_mul_testui16(x, y)
+  vector(unsigned(2)) :: vmul, x, y
+  vmul = vec_mul(x, y)
+
+! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = mul <8 x i16> %[[x]], %[[y]]
+end subroutine vec_mul_testui16
+
+! CHECK-LABEL: vec_mul_testui32
+subroutine vec_mul_testui32(x, y)
+  vector(unsigned(4)) :: vmul, x, y
+  vmul = vec_mul(x, y)
+
+! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = mul <4 x i32> %[[x]], %[[y]]
+end subroutine vec_mul_testui32
+
+! CHECK-LABEL: vec_mul_testui64
+subroutine vec_mul_testui64(x, y)
+  vector(unsigned(8)) :: vmul, x, y
+  vmul = vec_mul(x, y)
+
+! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = mul <2 x i64> %[[x]], %[[y]]
+end subroutine vec_mul_testui64
+
+! vec_sub
+
+! CHECK-LABEL: vec_sub_testf32
+subroutine vec_sub_testf32(x, y)
+  vector(real(4)) :: vsub, x, y
+  vsub = vec_sub(x, y)
+
+! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = fsub contract <4 x float> %[[x]], %[[y]]
+end subroutine vec_sub_testf32
+
+! CHECK-LABEL: vec_sub_testf64
+subroutine vec_sub_testf64(x, y)
+  vector(real(8)) :: vsub, x, y
+  vsub = vec_sub(x, y)
+
+! LLVMIR: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = fsub contract <2 x double> %[[x]], %[[y]]
+end subroutine vec_sub_testf64
+
+! CHECK-LABEL: vec_sub_testi8
+subroutine vec_sub_testi8(x, y)
+  vector(integer(1)) :: vsub, x, y
+  vsub = vec_sub(x, y)
+
+! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = sub <16 x i8> %[[x]], %[[y]]
+end subroutine vec_sub_testi8
+
+! CHECK-LABEL: vec_sub_testi16
+subroutine vec_sub_testi16(x, y)
+  vector(integer(2)) :: vsub, x, y
+  vsub = vec_sub(x, y)
+
+! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = sub <8 x i16> %[[x]], %[[y]]
+end subroutine vec_sub_testi16
+
+! CHECK-LABEL: vec_sub_testi32
+subroutine vec_sub_testi32(x, y)
+  vector(integer(4)) :: vsub, x, y
+  vsub = vec_sub(x, y)
+
+! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = sub <4 x i32> %[[x]], %[[y]]
+end subroutine vec_sub_testi32
+
+! CHECK-LABEL: vec_sub_testi64
+subroutine vec_sub_testi64(x, y)
+  vector(integer(8)) :: vsub, x, y
+  vsub = vec_sub(x, y)
+
+! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = sub <2 x i64> %[[x]], %[[y]]
+end subroutine vec_sub_testi64
+
+! CHECK-LABEL: vec_sub_testui8
+subroutine vec_sub_testui8(x, y)
+  vector(unsigned(1)) :: vsub, x, y
+  vsub = vec_sub(x, y)
+
+! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = sub <16 x i8> %[[x]], %[[y]]
+end subroutine vec_sub_testui8
+
+! CHECK-LABEL: vec_sub_testui16
+subroutine vec_sub_testui16(x, y)
+  vector(unsigned(2)) :: vsub, x, y
+  vsub = vec_sub(x, y)
+
+! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = sub <8 x i16> %[[x]], %[[y]]
+end subroutine vec_sub_testui16
+
+! CHECK-LABEL: vec_sub_testui32
+subroutine vec_sub_testui32(x, y)
+  vector(unsigned(4)) :: vsub, x, y
+  vsub = vec_sub(x, y)
+
+! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = sub <4 x i32> %[[x]], %[[y]]
+end subroutine vec_sub_testui32
+
+! CHECK-LABEL: vec_sub_testui64
+subroutine vec_sub_testui64(x, y)
+  vector(unsigned(8)) :: vsub, x, y
+  vsub = vec_sub(x, y)
+
+! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %{{[0-9]}} = sub <2 x i64> %[[x]], %[[y]]
+end subroutine vec_sub_testui64
+
+!----------------------
+! vec_and
+!----------------------
+
+! CHECK-LABEL: vec_and_test_i8
+subroutine vec_and_test_i8(arg1, arg2)
+  vector(integer(1)) :: r, arg1, arg2
+  r = vec_and(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = and <16 x i8> %[[arg1]], %[[arg2]]
+end subroutine vec_and_test_i8
+
+! CHECK-LABEL: vec_and_test_i16
+subroutine vec_and_test_i16(arg1, arg2)
+  vector(integer(2)) :: r, arg1, arg2
+  r = vec_and(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = and <8 x i16> %[[arg1]], %[[arg2]]
+end subroutine vec_and_test_i16
+
+! CHECK-LABEL: vec_and_test_i32
+subroutine vec_and_test_i32(arg1, arg2)
+  vector(integer(4)) :: r, arg1, arg2
+  r = vec_and(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = and <4 x i32> %[[arg1]], %[[arg2]]
+end subroutine vec_and_test_i32
+
+! CHECK-LABEL: vec_and_test_i64
+subroutine vec_and_test_i64(arg1, arg2)
+  vector(integer(8)) :: r, arg1, arg2
+  r = vec_and(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = and <2 x i64> %[[arg1]], %[[arg2]]
+end subroutine vec_and_test_i64
+
+! CHECK-LABEL: vec_and_test_u8
+subroutine vec_and_test_u8(arg1, arg2)
+  vector(unsigned(1)) :: r, arg1, arg2
+  r = vec_and(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = and <16 x i8> %[[arg1]], %[[arg2]]
+end subroutine vec_and_test_u8
+
+! CHECK-LABEL: vec_and_test_u16
+subroutine vec_and_test_u16(arg1, arg2)
+  vector(unsigned(2)) :: r, arg1, arg2
+  r = vec_and(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = and <8 x i16> %[[arg1]], %[[arg2]]
+end subroutine vec_and_test_u16
+
+! CHECK-LABEL: vec_and_test_u32
+subroutine vec_and_test_u32(arg1, arg2)
+  vector(unsigned(4)) :: r, arg1, arg2
+  r = vec_and(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = and <4 x i32> %[[arg1]], %[[arg2]]
+end subroutine vec_and_test_u32
+
+! CHECK-LABEL: vec_and_test_u64
+subroutine vec_and_test_u64(arg1, arg2)
+  vector(unsigned(8)) :: r, arg1, arg2
+  r = vec_and(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = and <2 x i64> %[[arg1]], %[[arg2]]
+end subroutine vec_and_test_u64
+
+! CHECK-LABEL: vec_and_testf32
+subroutine vec_and_testf32(arg1, arg2)
+  vector(real(4)) :: r, arg1, arg2
+  r = vec_and(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[bc1:.*]] = bitcast <4 x float> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[bc2:.*]] = bitcast <4 x float> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[r:.*]] = and <4 x i32> %[[bc1]], %[[bc2]]
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[r]] to <4 x float>
+end subroutine vec_and_testf32
+
+! CHECK-LABEL: vec_and_testf64
+subroutine vec_and_testf64(arg1, arg2)
+  vector(real(8)) :: r, arg1, arg2
+  r = vec_and(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[bc1:.*]] = bitcast <2 x double> %[[arg1]] to <2 x i64>
+! LLVMIR: %[[bc2:.*]] = bitcast <2 x double> %[[arg2]] to <2 x i64>
+! LLVMIR: %[[r:.*]] = and <2 x i64> %[[bc1]], %[[bc2]]
+! LLVMIR: %{{[0-9]+}} = bitcast <2 x i64> %[[r]] to <2 x double>
+end subroutine vec_and_testf64
+
+!----------------------
+! vec_xor
+!----------------------
+
+! CHECK-LABEL: vec_xor_test_i8
+subroutine vec_xor_test_i8(arg1, arg2)
+  vector(integer(1)) :: r, arg1, arg2
+  r = vec_xor(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = xor <16 x i8> %[[arg1]], %[[arg2]]
+end subroutine vec_xor_test_i8
+
+! CHECK-LABEL: vec_xor_test_i16
+subroutine vec_xor_test_i16(arg1, arg2)
+  vector(integer(2)) :: r, arg1, arg2
+  r = vec_xor(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = xor <8 x i16> %[[arg1]], %[[arg2]]
+end subroutine vec_xor_test_i16
+
+! CHECK-LABEL: vec_xor_test_i32
+subroutine vec_xor_test_i32(arg1, arg2)
+  vector(integer(4)) :: r, arg1, arg2
+  r = vec_xor(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = xor <4 x i32> %[[arg1]], %[[arg2]]
+end subroutine vec_xor_test_i32
+
+! CHECK-LABEL: vec_xor_test_i64
+subroutine vec_xor_test_i64(arg1, arg2)
+  vector(integer(8)) :: r, arg1, arg2
+  r = vec_xor(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = xor <2 x i64> %[[arg1]], %[[arg2]]
+end subroutine vec_xor_test_i64
+
+! CHECK-LABEL: vec_xor_test_u8
+subroutine vec_xor_test_u8(arg1, arg2)
+  vector(unsigned(1)) :: r, arg1, arg2
+  r = vec_xor(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = xor <16 x i8> %[[arg1]], %[[arg2]]
+end subroutine vec_xor_test_u8
+
+! CHECK-LABEL: vec_xor_test_u16
+subroutine vec_xor_test_u16(arg1, arg2)
+  vector(unsigned(2)) :: r, arg1, arg2
+  r = vec_xor(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = xor <8 x i16> %[[arg1]], %[[arg2]]
+end subroutine vec_xor_test_u16
+
+! CHECK-LABEL: vec_xor_test_u32
+subroutine vec_xor_test_u32(arg1, arg2)
+  vector(unsigned(4)) :: r, arg1, arg2
+  r = vec_xor(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = xor <4 x i32> %[[arg1]], %[[arg2]]
+end subroutine vec_xor_test_u32
+
+! CHECK-LABEL: vec_xor_test_u64
+subroutine vec_xor_test_u64(arg1, arg2)
+  vector(unsigned(8)) :: r, arg1, arg2
+  r = vec_xor(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = xor <2 x i64> %[[arg1]], %[[arg2]]
+end subroutine vec_xor_test_u64
+
+! CHECK-LABEL: vec_xor_testf32
+subroutine vec_xor_testf32(arg1, arg2)
+  vector(real(4)) :: r, arg1, arg2
+  r = vec_xor(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[bc1:.*]] = bitcast <4 x float> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[bc2:.*]] = bitcast <4 x float> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[r:.*]] = xor <4 x i32> %[[bc1]], %[[bc2]]
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[r]] to <4 x float>
+end subroutine vec_xor_testf32
+
+! CHECK-LABEL: vec_xor_testf64
+subroutine vec_xor_testf64(arg1, arg2)
+  vector(real(8)) :: r, arg1, arg2
+  r = vec_xor(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[bc1:.*]] = bitcast <2 x double> %[[arg1]] to <2 x i64>
+! LLVMIR: %[[bc2:.*]] = bitcast <2 x double> %[[arg2]] to <2 x i64>
+! LLVMIR: %[[r:.*]] = xor <2 x i64> %[[bc1]], %[[bc2]]
+! LLVMIR: %{{[0-9]+}} = bitcast <2 x i64> %[[r]] to <2 x double>
+end subroutine vec_xor_testf64
+

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-any.f90 b/flang/test/Lower/PowerPC/ppc-vec-any.f90
new file mode 100644
index 000000000000000..3edf65ccadbd442
--- /dev/null
+++ b/flang/test/Lower/PowerPC/ppc-vec-any.f90
@@ -0,0 +1,117 @@
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
+! REQUIRES: target=powerpc{{.*}}
+
+!----------------------
+! vec_any_ge
+!----------------------
+
+! CHECK-LABEL: vec_any_ge_test_i1
+subroutine vec_any_ge_test_i1(arg1, arg2)
+  vector(integer(1)), intent(in) :: arg1, arg2
+  integer(4) :: r
+  r = vec_any_ge(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtsb.p(i32 3, <16 x i8> %[[arg2]], <16 x i8> %[[arg1]])
+end subroutine vec_any_ge_test_i1
+
+! CHECK-LABEL: vec_any_ge_test_i2
+subroutine vec_any_ge_test_i2(arg1, arg2)
+  vector(integer(2)), intent(in) :: arg1, arg2
+  integer(4) :: r
+  r = vec_any_ge(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtsh.p(i32 3, <8 x i16> %[[arg2]], <8 x i16> %[[arg1]])
+end subroutine vec_any_ge_test_i2
+
+! CHECK-LABEL: vec_any_ge_test_i4
+subroutine vec_any_ge_test_i4(arg1, arg2)
+  vector(integer(4)), intent(in) :: arg1, arg2
+  integer(4) :: r
+  r = vec_any_ge(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtsw.p(i32 3, <4 x i32> %[[arg2]], <4 x i32> %[[arg1]])
+end subroutine vec_any_ge_test_i4
+
+! CHECK-LABEL: vec_any_ge_test_i8
+subroutine vec_any_ge_test_i8(arg1, arg2)
+  vector(integer(8)), intent(in) :: arg1, arg2
+  integer(4) :: r
+  r = vec_any_ge(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtsd.p(i32 3, <2 x i64> %[[arg2]], <2 x i64> %[[arg1]])
+end subroutine vec_any_ge_test_i8
+
+! CHECK-LABEL: vec_any_ge_test_u1
+subroutine vec_any_ge_test_u1(arg1, arg2)
+  vector(unsigned(1)), intent(in) :: arg1, arg2
+  integer(4) :: r
+  r = vec_any_ge(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtub.p(i32 3, <16 x i8> %[[arg2]], <16 x i8> %[[arg1]])
+end subroutine vec_any_ge_test_u1
+
+! CHECK-LABEL: vec_any_ge_test_u2
+subroutine vec_any_ge_test_u2(arg1, arg2)
+  vector(unsigned(2)), intent(in) :: arg1, arg2
+  integer(4) :: r
+  r = vec_any_ge(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtuh.p(i32 3, <8 x i16> %[[arg2]], <8 x i16> %[[arg1]])
+end subroutine vec_any_ge_test_u2
+
+! CHECK-LABEL: vec_any_ge_test_u4
+subroutine vec_any_ge_test_u4(arg1, arg2)
+  vector(unsigned(4)), intent(in) :: arg1, arg2
+  integer(4) :: r
+  r = vec_any_ge(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtuw.p(i32 3, <4 x i32> %[[arg2]], <4 x i32> %[[arg1]])
+end subroutine vec_any_ge_test_u4
+
+! CHECK-LABEL: vec_any_ge_test_u8
+subroutine vec_any_ge_test_u8(arg1, arg2)
+  vector(unsigned(8)), intent(in) :: arg1, arg2
+  integer(4) :: r
+  r = vec_any_ge(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtud.p(i32 3, <2 x i64> %[[arg2]], <2 x i64> %[[arg1]])
+end subroutine vec_any_ge_test_u8
+
+! CHECK-LABEL: vec_any_ge_test_r4
+subroutine vec_any_ge_test_r4(arg1, arg2)
+  vector(real(4)), intent(in) :: arg1, arg2
+  integer(4) :: r
+  r = vec_any_ge(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call i32 @llvm.ppc.vsx.xvcmpgesp.p(i32 1, <4 x float> %[[arg1]], <4 x float> %[[arg2]])
+end subroutine vec_any_ge_test_r4
+
+! CHECK-LABEL: vec_any_ge_test_r8
+subroutine vec_any_ge_test_r8(arg1, arg2)
+  vector(real(8)), intent(in) :: arg1, arg2
+  integer(4) :: r
+  r = vec_any_ge(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call i32 @llvm.ppc.vsx.xvcmpgedp.p(i32 1, <2 x double> %[[arg1]], <2 x double> %[[arg2]])
+end subroutine vec_any_ge_test_r8
+

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-cmp.f90 b/flang/test/Lower/PowerPC/ppc-vec-cmp.f90
new file mode 100644
index 000000000000000..2fbef7a70122fba
--- /dev/null
+++ b/flang/test/Lower/PowerPC/ppc-vec-cmp.f90
@@ -0,0 +1,473 @@
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
+! REQUIRES: target=powerpc{{.*}}
+
+!----------------------
+! vec_cmpge
+!----------------------
+
+! CHECK-LABEL: vec_cmpge_test_i8
+subroutine vec_cmpge_test_i8(arg1, arg2)
+  vector(integer(8)) :: arg1, arg2
+  vector(unsigned(8)) :: r
+  r = vec_cmpge(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[res:.*]] = call <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64> %[[arg2]], <2 x i64> %[[arg1]])
+! LLVMIR: %{{[0-9]+}} = xor <2 x i64> %[[res]], <i64 -1, i64 -1>
+end subroutine vec_cmpge_test_i8
+
+! CHECK-LABEL: vec_cmpge_test_i4
+subroutine vec_cmpge_test_i4(arg1, arg2)
+  vector(integer(4)) :: arg1, arg2
+  vector(unsigned(4)) :: r
+  r = vec_cmpge(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vcmpgtsw(<4 x i32> %[[arg2]], <4 x i32> %[[arg1]])
+! LLVMIR: %{{[0-9]+}} = xor <4 x i32> %[[res]], <i32 -1, i32 -1, i32 -1, i32 -1>
+end subroutine vec_cmpge_test_i4
+
+! CHECK-LABEL: vec_cmpge_test_i2
+subroutine vec_cmpge_test_i2(arg1, arg2)
+  vector(integer(2)) :: arg1, arg2
+  vector(unsigned(2)) :: r
+  r = vec_cmpge(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[res:.*]] = call <8 x i16> @llvm.ppc.altivec.vcmpgtsh(<8 x i16> %[[arg2]], <8 x i16> %[[arg1]])
+! LLVMIR: %{{[0-9]+}} = xor <8 x i16> %[[res]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+end subroutine vec_cmpge_test_i2
+
+! CHECK-LABEL: vec_cmpge_test_i1
+subroutine vec_cmpge_test_i1(arg1, arg2)
+  vector(integer(1)) :: arg1, arg2
+  vector(unsigned(1)) :: r
+  r = vec_cmpge(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[res:.*]] = call <16 x i8> @llvm.ppc.altivec.vcmpgtsb(<16 x i8> %[[arg2]], <16 x i8> %[[arg1]])
+! LLVMIR: %{{[0-9]+}} = xor <16 x i8> %[[res]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+end subroutine vec_cmpge_test_i1
+
+! CHECK-LABEL: vec_cmpge_test_u8
+subroutine vec_cmpge_test_u8(arg1, arg2)
+  vector(unsigned(8)) :: arg1, arg2
+  vector(unsigned(8)) :: r
+  r = vec_cmpge(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[res:.*]] = call <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64> %[[arg2]], <2 x i64> %[[arg1]])
+! LLVMIR: %{{[0-9]+}} = xor <2 x i64> %[[res]], <i64 -1, i64 -1>
+end subroutine vec_cmpge_test_u8
+
+! CHECK-LABEL: vec_cmpge_test_u4
+subroutine vec_cmpge_test_u4(arg1, arg2)
+  vector(unsigned(4)) :: arg1, arg2
+  vector(unsigned(4)) :: r
+  r = vec_cmpge(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vcmpgtuw(<4 x i32> %[[arg2]], <4 x i32> %[[arg1]])
+! LLVMIR: %{{[0-9]+}} = xor <4 x i32> %[[res]], <i32 -1, i32 -1, i32 -1, i32 -1>
+end subroutine vec_cmpge_test_u4
+
+! CHECK-LABEL: vec_cmpge_test_u2
+subroutine vec_cmpge_test_u2(arg1, arg2)
+  vector(unsigned(2)) :: arg1, arg2
+  vector(unsigned(2)) :: r
+  r = vec_cmpge(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[res:.*]] = call <8 x i16> @llvm.ppc.altivec.vcmpgtuh(<8 x i16> %[[arg2]], <8 x i16> %[[arg1]])
+! LLVMIR: %{{[0-9]+}} = xor <8 x i16> %[[res]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+end subroutine vec_cmpge_test_u2
+
+! CHECK-LABEL: vec_cmpge_test_u1
+subroutine vec_cmpge_test_u1(arg1, arg2)
+  vector(unsigned(1)) :: arg1, arg2
+  vector(unsigned(1)) :: r
+  r = vec_cmpge(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[res:.*]] = call <16 x i8> @llvm.ppc.altivec.vcmpgtub(<16 x i8> %[[arg2]], <16 x i8> %[[arg1]])
+! LLVMIR: %{{[0-9]+}} = xor <16 x i8> %[[res]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+end subroutine vec_cmpge_test_u1
+
+subroutine vec_cmpge_test_r4(arg1, arg2)
+  vector(real(4)) :: arg1, arg2
+  vector(unsigned(4)) :: r
+  r = vec_cmpge(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %[[arg1]], <4 x float> %[[arg2]])
+end subroutine vec_cmpge_test_r4
+
+subroutine vec_cmpge_test_r8(arg1, arg2)
+  vector(real(8)) :: arg1, arg2
+  vector(unsigned(8)) :: r
+  r = vec_cmpge(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %[[arg1]], <2 x double> %[[arg2]])
+end subroutine vec_cmpge_test_r8
+
+!----------------------
+! vec_cmpgt
+!----------------------
+
+! CHECK-LABEL: vec_cmpgt_test_i1
+subroutine vec_cmpgt_test_i1(arg1, arg2)
+  vector(integer(1)) :: arg1, arg2
+  vector(unsigned(1)) :: r
+  r = vec_cmpgt(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <16 x i8> @llvm.ppc.altivec.vcmpgtsb(<16 x i8> %[[arg1]], <16 x i8> %[[arg2]])
+end subroutine vec_cmpgt_test_i1
+
+! CHECK-LABEL: vec_cmpgt_test_i2
+subroutine vec_cmpgt_test_i2(arg1, arg2)
+  vector(integer(2)) :: arg1, arg2
+  vector(unsigned(2)) :: r
+  r = vec_cmpgt(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <8 x i16> @llvm.ppc.altivec.vcmpgtsh(<8 x i16> %[[arg1]], <8 x i16> %[[arg2]])
+end subroutine vec_cmpgt_test_i2
+
+! CHECK-LABEL: vec_cmpgt_test_i4
+subroutine vec_cmpgt_test_i4(arg1, arg2)
+  vector(integer(4)) :: arg1, arg2
+  vector(unsigned(4)) :: r
+  r = vec_cmpgt(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vcmpgtsw(<4 x i32> %[[arg1]], <4 x i32> %[[arg2]])
+end subroutine vec_cmpgt_test_i4
+
+! CHECK-LABEL: vec_cmpgt_test_i8
+subroutine vec_cmpgt_test_i8(arg1, arg2)
+  vector(integer(8)) :: arg1, arg2
+  vector(unsigned(8)) :: r
+  r = vec_cmpgt(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64> %[[arg1]], <2 x i64> %[[arg2]])
+end subroutine vec_cmpgt_test_i8
+
+! CHECK-LABEL: vec_cmpgt_test_u1
+subroutine vec_cmpgt_test_u1(arg1, arg2)
+  vector(unsigned(1)) :: arg1, arg2
+  vector(unsigned(1)) :: r
+  r = vec_cmpgt(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <16 x i8> @llvm.ppc.altivec.vcmpgtub(<16 x i8> %[[arg1]], <16 x i8> %[[arg2]])
+end subroutine vec_cmpgt_test_u1
+
+! CHECK-LABEL: vec_cmpgt_test_u2
+subroutine vec_cmpgt_test_u2(arg1, arg2)
+  vector(unsigned(2)) :: arg1, arg2
+  vector(unsigned(2)) :: r
+  r = vec_cmpgt(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <8 x i16> @llvm.ppc.altivec.vcmpgtuh(<8 x i16> %[[arg1]], <8 x i16> %[[arg2]])
+end subroutine vec_cmpgt_test_u2
+
+! CHECK-LABEL: vec_cmpgt_test_u4
+subroutine vec_cmpgt_test_u4(arg1, arg2)
+  vector(unsigned(4)) :: arg1, arg2
+  vector(unsigned(4)) :: r
+  r = vec_cmpgt(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vcmpgtuw(<4 x i32> %[[arg1]], <4 x i32> %[[arg2]])
+end subroutine vec_cmpgt_test_u4
+
+! CHECK-LABEL: vec_cmpgt_test_u8
+subroutine vec_cmpgt_test_u8(arg1, arg2)
+  vector(unsigned(8)) :: arg1, arg2
+  vector(unsigned(8)) :: r
+  r = vec_cmpgt(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64> %[[arg1]], <2 x i64> %[[arg2]])
+end subroutine vec_cmpgt_test_u8
+
+! CHECK-LABEL: vec_cmpgt_test_r4
+subroutine vec_cmpgt_test_r4(arg1, arg2)
+  vector(real(4)) :: arg1, arg2
+  vector(unsigned(4)) :: r
+  r = vec_cmpgt(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %[[arg1]], <4 x float> %[[arg2]])
+end subroutine vec_cmpgt_test_r4
+
+! CHECK-LABEL: vec_cmpgt_test_r8
+subroutine vec_cmpgt_test_r8(arg1, arg2)
+  vector(real(8)) :: arg1, arg2
+  vector(unsigned(8)) :: r
+  r = vec_cmpgt(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %[[arg1]], <2 x double> %[[arg2]])
+end subroutine vec_cmpgt_test_r8
+
+!----------------------
+! vec_cmple
+!----------------------
+
+! CHECK-LABEL: vec_cmple_test_i8
+subroutine vec_cmple_test_i8(arg1, arg2)
+  vector(integer(8)) :: arg1, arg2
+  vector(unsigned(8)) :: r
+  r = vec_cmple(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[res:.*]] = call <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64> %[[arg1]], <2 x i64> %[[arg2]])
+! LLVMIR: %{{[0-9]+}} = xor <2 x i64> %[[res]], <i64 -1, i64 -1>
+end subroutine vec_cmple_test_i8
+
+! CHECK-LABEL: vec_cmple_test_i4
+subroutine vec_cmple_test_i4(arg1, arg2)
+  vector(integer(4)) :: arg1, arg2
+  vector(unsigned(4)) :: r
+  r = vec_cmple(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vcmpgtsw(<4 x i32> %[[arg1]], <4 x i32> %[[arg2]])
+! LLVMIR: %{{[0-9]+}} = xor <4 x i32> %[[res]], <i32 -1, i32 -1, i32 -1, i32 -1>
+end subroutine vec_cmple_test_i4
+
+! CHECK-LABEL: vec_cmple_test_i2
+subroutine vec_cmple_test_i2(arg1, arg2)
+  vector(integer(2)) :: arg1, arg2
+  vector(unsigned(2)) :: r
+  r = vec_cmple(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[res:.*]] = call <8 x i16> @llvm.ppc.altivec.vcmpgtsh(<8 x i16> %[[arg1]], <8 x i16> %[[arg2]])
+! LLVMIR: %{{[0-9]+}} = xor <8 x i16> %[[res]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+end subroutine vec_cmple_test_i2
+
+! CHECK-LABEL: vec_cmple_test_i1
+subroutine vec_cmple_test_i1(arg1, arg2)
+  vector(integer(1)) :: arg1, arg2
+  vector(unsigned(1)) :: r
+  r = vec_cmple(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[res:.*]] = call <16 x i8> @llvm.ppc.altivec.vcmpgtsb(<16 x i8> %[[arg1]], <16 x i8> %[[arg2]])
+! LLVMIR: %{{[0-9]+}} = xor <16 x i8> %[[res]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+end subroutine vec_cmple_test_i1
+
+! CHECK-LABEL: vec_cmple_test_u8
+subroutine vec_cmple_test_u8(arg1, arg2)
+  vector(unsigned(8)) :: arg1, arg2
+  vector(unsigned(8)) :: r
+  r = vec_cmple(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[res:.*]] = call <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64> %[[arg1]], <2 x i64> %[[arg2]])
+! LLVMIR: %{{[0-9]+}} = xor <2 x i64> %[[res]], <i64 -1, i64 -1>
+end subroutine vec_cmple_test_u8
+
+! CHECK-LABEL: vec_cmple_test_u4
+subroutine vec_cmple_test_u4(arg1, arg2)
+  vector(unsigned(4)) :: arg1, arg2
+  vector(unsigned(4)) :: r
+  r = vec_cmple(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vcmpgtuw(<4 x i32> %[[arg1]], <4 x i32> %[[arg2]])
+! LLVMIR: %{{[0-9]+}} = xor <4 x i32> %[[res]], <i32 -1, i32 -1, i32 -1, i32 -1>
+end subroutine vec_cmple_test_u4
+
+! CHECK-LABEL: vec_cmple_test_u2
+subroutine vec_cmple_test_u2(arg1, arg2)
+  vector(unsigned(2)) :: arg1, arg2
+  vector(unsigned(2)) :: r
+  r = vec_cmple(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[res:.*]] = call <8 x i16> @llvm.ppc.altivec.vcmpgtuh(<8 x i16> %[[arg1]], <8 x i16> %[[arg2]])
+! LLVMIR: %{{[0-9]+}} = xor <8 x i16> %[[res]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+end subroutine vec_cmple_test_u2
+
+! CHECK-LABEL: vec_cmple_test_u1
+subroutine vec_cmple_test_u1(arg1, arg2)
+  vector(unsigned(1)) :: arg1, arg2
+  vector(unsigned(1)) :: r
+  r = vec_cmple(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[res:.*]] = call <16 x i8> @llvm.ppc.altivec.vcmpgtub(<16 x i8> %[[arg1]], <16 x i8> %[[arg2]])
+! LLVMIR: %{{[0-9]+}} = xor <16 x i8> %[[res]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+end subroutine vec_cmple_test_u1
+
+! CHECK-LABEL: vec_cmple_test_r4
+subroutine vec_cmple_test_r4(arg1, arg2)
+  vector(real(4)) :: arg1, arg2
+  vector(unsigned(4)) :: r
+  r = vec_cmple(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %[[arg2]], <4 x float> %[[arg1]])
+end subroutine vec_cmple_test_r4
+
+! CHECK-LABEL: vec_cmple_test_r8
+subroutine vec_cmple_test_r8(arg1, arg2)
+  vector(real(8)) :: arg1, arg2
+  vector(unsigned(8)) :: r
+  r = vec_cmple(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %[[arg2]], <2 x double> %[[arg1]])
+end subroutine vec_cmple_test_r8
+
+!----------------------
+! vec_cmplt
+!----------------------
+
+! CHECK-LABEL: vec_cmplt_test_i1
+subroutine vec_cmplt_test_i1(arg1, arg2)
+  vector(integer(1)) :: arg1, arg2
+  vector(unsigned(1)) :: r
+  r = vec_cmplt(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <16 x i8> @llvm.ppc.altivec.vcmpgtsb(<16 x i8> %[[arg2]], <16 x i8> %[[arg1]])
+end subroutine vec_cmplt_test_i1
+
+! CHECK-LABEL: vec_cmplt_test_i2
+subroutine vec_cmplt_test_i2(arg1, arg2)
+  vector(integer(2)) :: arg1, arg2
+  vector(unsigned(2)) :: r
+  r = vec_cmplt(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <8 x i16> @llvm.ppc.altivec.vcmpgtsh(<8 x i16> %[[arg2]], <8 x i16> %[[arg1]])
+end subroutine vec_cmplt_test_i2
+
+! CHECK-LABEL: vec_cmplt_test_i4
+subroutine vec_cmplt_test_i4(arg1, arg2)
+  vector(integer(4)) :: arg1, arg2
+  vector(unsigned(4)) :: r
+  r = vec_cmplt(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vcmpgtsw(<4 x i32> %[[arg2]], <4 x i32> %[[arg1]])
+end subroutine vec_cmplt_test_i4
+
+! CHECK-LABEL: vec_cmplt_test_i8
+subroutine vec_cmplt_test_i8(arg1, arg2)
+  vector(integer(8)) :: arg1, arg2
+  vector(unsigned(8)) :: r
+  r = vec_cmplt(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64> %[[arg2]], <2 x i64> %[[arg1]])
+end subroutine vec_cmplt_test_i8
+
+! CHECK-LABEL: vec_cmplt_test_u1
+subroutine vec_cmplt_test_u1(arg1, arg2)
+  vector(unsigned(1)) :: arg1, arg2
+  vector(unsigned(1)) :: r
+  r = vec_cmplt(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <16 x i8> @llvm.ppc.altivec.vcmpgtub(<16 x i8> %[[arg2]], <16 x i8> %[[arg1]])
+end subroutine vec_cmplt_test_u1
+
+! CHECK-LABEL: vec_cmplt_test_u2
+subroutine vec_cmplt_test_u2(arg1, arg2)
+  vector(unsigned(2)) :: arg1, arg2
+  vector(unsigned(2)) :: r
+  r = vec_cmplt(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <8 x i16> @llvm.ppc.altivec.vcmpgtuh(<8 x i16> %[[arg2]], <8 x i16> %[[arg1]])
+end subroutine vec_cmplt_test_u2
+
+! CHECK-LABEL: vec_cmplt_test_u4
+subroutine vec_cmplt_test_u4(arg1, arg2)
+  vector(unsigned(4)) :: arg1, arg2
+  vector(unsigned(4)) :: r
+  r = vec_cmplt(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vcmpgtuw(<4 x i32> %[[arg2]], <4 x i32> %[[arg1]])
+end subroutine vec_cmplt_test_u4
+
+! CHECK-LABEL: vec_cmplt_test_u8
+subroutine vec_cmplt_test_u8(arg1, arg2)
+  vector(unsigned(8)) :: arg1, arg2
+  vector(unsigned(8)) :: r
+  r = vec_cmplt(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64> %[[arg2]], <2 x i64> %[[arg1]])
+end subroutine vec_cmplt_test_u8
+
+! CHECK-LABEL: vec_cmplt_test_r4
+subroutine vec_cmplt_test_r4(arg1, arg2)
+  vector(real(4)) :: arg1, arg2
+  vector(unsigned(4)) :: r
+  r = vec_cmplt(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %[[arg2]], <4 x float> %[[arg1]])
+end subroutine vec_cmplt_test_r4
+
+! CHECK-LABEL: vec_cmplt_test_r8
+subroutine vec_cmplt_test_r8(arg1, arg2)
+  vector(real(8)) :: arg1, arg2
+  vector(unsigned(8)) :: r
+  r = vec_cmplt(arg1, arg2)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %[[arg2]], <2 x double> %[[arg1]])
+end subroutine vec_cmplt_test_r8
+

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-convert.f90 b/flang/test/Lower/PowerPC/ppc-vec-convert.f90
index 587d7fc17e6af06..14e247f83df67a9 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-convert.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-convert.f90
@@ -1,6 +1,4 @@
-! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s
-! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
 !---------
@@ -12,21 +10,9 @@ subroutine vec_ctf_test_i4i1(arg1)
   vector(real(4)) :: r
   r = vec_ctf(arg1, 1_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = arith.constant 1 : i8
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (i8) -> i32
-! CHECK-FIR: %[[r:.*]] = fir.call @llvm.ppc.altivec.vcfsx(%[[arg1]], %[[carg2]]) fastmath<contract> : (!fir.vector<4:i32>, i32) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.mlir.constant(1 : i8) : i8
-! CHECK-LLVMIR: %[[carg2:.*]] = llvm.sext %[[arg2]] : i8 to i32
-! CHECK-LLVMIR: %[[r:.*]] = llvm.call @llvm.ppc.altivec.vcfsx(%[[arg1]], %[[carg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, i32) -> vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = call contract <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %[[arg1]], i32 1)
-! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = call contract <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %[[arg1]], i32 1)
+! LLVMIR: store <4 x float> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_ctf_test_i4i1
 
 ! CHECK-LABEL: vec_ctf_test_i4i2
@@ -35,21 +21,9 @@ subroutine vec_ctf_test_i4i2(arg1)
   vector(real(4)) :: r
   r = vec_ctf(arg1, 1_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = arith.constant 1 : i16
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (i16) -> i32
-! CHECK-FIR: %[[r:.*]] = fir.call @llvm.ppc.altivec.vcfsx(%[[arg1]], %[[carg2]]) fastmath<contract> : (!fir.vector<4:i32>, i32) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.mlir.constant(1 : i16) : i16
-! CHECK-LLVMIR: %[[carg2:.*]] = llvm.sext %[[arg2]] : i16 to i32
-! CHECK-LLVMIR: %[[r:.*]] = llvm.call @llvm.ppc.altivec.vcfsx(%[[arg1]], %[[carg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, i32) -> vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = call contract <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %[[arg1]], i32 1)
-! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = call contract <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %[[arg1]], i32 1)
+! LLVMIR: store <4 x float> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_ctf_test_i4i2
 
 ! CHECK-LABEL: vec_ctf_test_i4i4
@@ -58,19 +32,9 @@ subroutine vec_ctf_test_i4i4(arg1)
   vector(real(4)) :: r
   r = vec_ctf(arg1, 1_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = arith.constant 1 : i32
-! CHECK-FIR: %[[r:.*]] = fir.call @llvm.ppc.altivec.vcfsx(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<4:i32>, i32) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.mlir.constant(1 : i32) : i32
-! CHECK-LLVMIR: %[[r:.*]] = llvm.call @llvm.ppc.altivec.vcfsx(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, i32) -> vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = call contract <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %[[arg1]], i32 1)
-! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = call contract <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %[[arg1]], i32 1)
+! LLVMIR: store <4 x float> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_ctf_test_i4i4
 
 ! CHECK-LABEL: vec_ctf_test_i4i8
@@ -79,21 +43,9 @@ subroutine vec_ctf_test_i4i8(arg1)
   vector(real(4)) :: r
   r = vec_ctf(arg1, 1_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = arith.constant 1 : i64
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (i64) -> i32
-! CHECK-FIR: %[[r:.*]] = fir.call @llvm.ppc.altivec.vcfsx(%[[arg1]], %[[carg2]]) fastmath<contract> : (!fir.vector<4:i32>, i32) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.mlir.constant(1 : i64) : i64
-! CHECK-LLVMIR: %[[carg2:.*]] = llvm.trunc %[[arg2]] : i64 to i32
-! CHECK-LLVMIR: %[[r:.*]] = llvm.call @llvm.ppc.altivec.vcfsx(%[[arg1]], %[[carg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, i32) -> vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = call contract <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %[[arg1]], i32 1)
-! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = call contract <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %[[arg1]], i32 1)
+! LLVMIR: store <4 x float> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_ctf_test_i4i8
 
 ! CHECK-LABEL: vec_ctf_test_i8i1
@@ -102,24 +54,10 @@ subroutine vec_ctf_test_i8i1(arg1)
   vector(real(8)) :: r
   r = vec_ctf(arg1, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[varg:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg:.*]] = llvm.sitofp %[[varg]] : vector<2xi64> to vector<2xf64>
-! CHECK-FIR: %[[cst:.*]] = arith.constant dense<1.250000e-01> : vector<2xf64>
-! CHECK-FIR: %[[r:.*]] = llvm.fmul %[[carg]], %[[cst]]  : vector<2xf64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[carg:.*]] = llvm.sitofp %[[arg1]] : vector<2xi64> to vector<2xf64>
-! CHECK-LLVMIR: %[[cst:.*]] = llvm.mlir.constant(dense<1.250000e-01> : vector<2xf64>) : vector<2xf64>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.fmul %[[carg]], %[[cst]]  : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[carg:.*]] = sitofp <2 x i64> %[[arg1]] to <2 x double>
-! CHECK: %[[r:.*]] = fmul <2 x double> %[[carg]], <double 1.250000e-01, double 1.250000e-01>
-! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[carg:.*]] = sitofp <2 x i64> %[[arg1]] to <2 x double>
+! LLVMIR: %[[r:.*]] = fmul <2 x double> %[[carg]], <double 1.250000e-01, double 1.250000e-01>
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_ctf_test_i8i1
 
 ! CHECK-LABEL: vec_ctf_test_i8i2
@@ -128,24 +66,10 @@ subroutine vec_ctf_test_i8i2(arg1)
   vector(real(8)) :: r
   r = vec_ctf(arg1, 3_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[varg:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg:.*]] = llvm.sitofp %[[varg]] : vector<2xi64> to vector<2xf64>
-! CHECK-FIR: %[[cst:.*]] = arith.constant dense<1.250000e-01> : vector<2xf64>
-! CHECK-FIR: %[[r:.*]] = llvm.fmul %[[carg]], %[[cst]]  : vector<2xf64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[carg:.*]] = llvm.sitofp %[[arg1]] : vector<2xi64> to vector<2xf64>
-! CHECK-LLVMIR: %[[cst:.*]] = llvm.mlir.constant(dense<1.250000e-01> : vector<2xf64>) : vector<2xf64>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.fmul %[[carg]], %[[cst]]  : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[carg:.*]] = sitofp <2 x i64> %[[arg1]] to <2 x double>
-! CHECK: %[[r:.*]] = fmul <2 x double> %[[carg]], <double 1.250000e-01, double 1.250000e-01>
-! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[carg:.*]] = sitofp <2 x i64> %[[arg1]] to <2 x double>
+! LLVMIR: %[[r:.*]] = fmul <2 x double> %[[carg]], <double 1.250000e-01, double 1.250000e-01>
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_ctf_test_i8i2
 
 ! CHECK-LABEL: vec_ctf_test_i8i4
@@ -154,24 +78,10 @@ subroutine vec_ctf_test_i8i4(arg1)
   vector(real(8)) :: r
   r = vec_ctf(arg1, 3_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[varg:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg:.*]] = llvm.sitofp %[[varg]] : vector<2xi64> to vector<2xf64>
-! CHECK-FIR: %[[cst:.*]] = arith.constant dense<1.250000e-01> : vector<2xf64>
-! CHECK-FIR: %[[r:.*]] = llvm.fmul %[[carg]], %[[cst]]  : vector<2xf64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[carg:.*]] = llvm.sitofp %[[arg1]] : vector<2xi64> to vector<2xf64>
-! CHECK-LLVMIR: %[[cst:.*]] = llvm.mlir.constant(dense<1.250000e-01> : vector<2xf64>) : vector<2xf64>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.fmul %[[carg]], %[[cst]]  : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[carg:.*]] = sitofp <2 x i64> %[[arg1]] to <2 x double>
-! CHECK: %[[r:.*]] = fmul <2 x double> %[[carg]], <double 1.250000e-01, double 1.250000e-01>
-! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[carg:.*]] = sitofp <2 x i64> %[[arg1]] to <2 x double>
+! LLVMIR: %[[r:.*]] = fmul <2 x double> %[[carg]], <double 1.250000e-01, double 1.250000e-01>
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_ctf_test_i8i4
 
 ! CHECK-LABEL: vec_ctf_test_i8i8
@@ -180,24 +90,10 @@ subroutine vec_ctf_test_i8i8(arg1)
   vector(real(8)) :: r
   r = vec_ctf(arg1, 3_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[varg:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg:.*]] = llvm.sitofp %[[varg]] : vector<2xi64> to vector<2xf64>
-! CHECK-FIR: %[[cst:.*]] = arith.constant dense<1.250000e-01> : vector<2xf64>
-! CHECK-FIR: %[[r:.*]] = llvm.fmul %[[carg]], %[[cst]]  : vector<2xf64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[carg:.*]] = llvm.sitofp %[[arg1]] : vector<2xi64> to vector<2xf64>
-! CHECK-LLVMIR: %[[cst:.*]] = llvm.mlir.constant(dense<1.250000e-01> : vector<2xf64>) : vector<2xf64>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.fmul %[[carg]], %[[cst]]  : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[carg:.*]] = sitofp <2 x i64> %[[arg1]] to <2 x double>
-! CHECK: %[[r:.*]] = fmul <2 x double> %[[carg]], <double 1.250000e-01, double 1.250000e-01>
-! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[carg:.*]] = sitofp <2 x i64> %[[arg1]] to <2 x double>
+! LLVMIR: %[[r:.*]] = fmul <2 x double> %[[carg]], <double 1.250000e-01, double 1.250000e-01>
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_ctf_test_i8i8
 
 ! CHECK-LABEL: vec_ctf_test_u4i1
@@ -206,21 +102,9 @@ subroutine vec_ctf_test_u4i1(arg1)
   vector(real(4)) :: r
   r = vec_ctf(arg1, 1_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = arith.constant 1 : i8
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (i8) -> i32
-! CHECK-FIR: %[[r:.*]] = fir.call @llvm.ppc.altivec.vcfux(%[[arg1]], %[[carg2]]) fastmath<contract> : (!fir.vector<4:ui32>, i32) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.mlir.constant(1 : i8) : i8
-! CHECK-LLVMIR: %[[carg2:.*]] = llvm.sext %[[arg2]] : i8 to i32
-! CHECK-LLVMIR: %[[r:.*]] = llvm.call @llvm.ppc.altivec.vcfux(%[[arg1]], %[[carg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, i32) -> vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = call contract <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> %[[arg1]], i32 1)
-! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = call contract <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> %[[arg1]], i32 1)
+! LLVMIR: store <4 x float> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_ctf_test_u4i1
 
 ! CHECK-LABEL: vec_ctf_test_u4i2
@@ -229,21 +113,9 @@ subroutine vec_ctf_test_u4i2(arg1)
   vector(real(4)) :: r
   r = vec_ctf(arg1, 1_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = arith.constant 1 : i16
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (i16) -> i32
-! CHECK-FIR: %[[r:.*]] = fir.call @llvm.ppc.altivec.vcfux(%[[arg1]], %[[carg2]]) fastmath<contract> : (!fir.vector<4:ui32>, i32) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.mlir.constant(1 : i16) : i16
-! CHECK-LLVMIR: %[[carg2:.*]] = llvm.sext %[[arg2]] : i16 to i32
-! CHECK-LLVMIR: %[[r:.*]] = llvm.call @llvm.ppc.altivec.vcfux(%[[arg1]], %[[carg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, i32) -> vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = call contract <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> %[[arg1]], i32 1)
-! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = call contract <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> %[[arg1]], i32 1)
+! LLVMIR: store <4 x float> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_ctf_test_u4i2
 
 ! CHECK-LABEL: vec_ctf_test_u4i4
@@ -252,19 +124,9 @@ subroutine vec_ctf_test_u4i4(arg1)
   vector(real(4)) :: r
   r = vec_ctf(arg1, 1_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = arith.constant 1 : i32
-! CHECK-FIR: %[[r:.*]] = fir.call @llvm.ppc.altivec.vcfux(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<4:ui32>, i32) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.mlir.constant(1 : i32) : i32
-! CHECK-LLVMIR: %[[r:.*]] = llvm.call @llvm.ppc.altivec.vcfux(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, i32) -> vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = call contract <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> %[[arg1]], i32 1)
-! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = call contract <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> %[[arg1]], i32 1)
+! LLVMIR: store <4 x float> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_ctf_test_u4i4
 
 ! CHECK-LABEL: vec_ctf_test_u4i8
@@ -273,21 +135,9 @@ subroutine vec_ctf_test_u4i8(arg1)
   vector(real(4)) :: r
   r = vec_ctf(arg1, 1_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = arith.constant 1 : i64
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (i64) -> i32
-! CHECK-FIR: %[[r:.*]] = fir.call @llvm.ppc.altivec.vcfux(%[[arg1]], %[[carg2]]) fastmath<contract> : (!fir.vector<4:ui32>, i32) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.mlir.constant(1 : i64) : i64
-! CHECK-LLVMIR: %[[carg2:.*]] = llvm.trunc %[[arg2]] : i64 to i32
-! CHECK-LLVMIR: %[[r:.*]] = llvm.call @llvm.ppc.altivec.vcfux(%[[arg1]], %[[carg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, i32) -> vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = call contract <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> %[[arg1]], i32 1)
-! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = call contract <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> %[[arg1]], i32 1)
+! LLVMIR: store <4 x float> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_ctf_test_u4i8
 
 ! CHECK-LABEL: vec_ctf_test_u8i1
@@ -296,24 +146,10 @@ subroutine vec_ctf_test_u8i1(arg1)
   vector(real(8)) :: r
   r = vec_ctf(arg1, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[varg:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg:.*]] = llvm.uitofp %[[varg]] : vector<2xi64> to vector<2xf64>
-! CHECK-FIR: %[[cst:.*]] = arith.constant dense<1.250000e-01> : vector<2xf64>
-! CHECK-FIR: %[[r:.*]] = llvm.fmul %[[carg]], %[[cst]]  : vector<2xf64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[carg:.*]] = llvm.uitofp %[[arg1]] : vector<2xi64> to vector<2xf64>
-! CHECK-LLVMIR: %[[cst:.*]] = llvm.mlir.constant(dense<1.250000e-01> : vector<2xf64>) : vector<2xf64>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.fmul %[[carg]], %[[cst]]  : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[carg:.*]] = uitofp <2 x i64> %[[arg1]] to <2 x double>
-! CHECK: %[[r:.*]] = fmul <2 x double> %[[carg]], <double 1.250000e-01, double 1.250000e-01>
-! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[carg:.*]] = uitofp <2 x i64> %[[arg1]] to <2 x double>
+! LLVMIR: %[[r:.*]] = fmul <2 x double> %[[carg]], <double 1.250000e-01, double 1.250000e-01>
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_ctf_test_u8i1
 
 ! CHECK-LABEL: vec_ctf_test_u8i2
@@ -322,24 +158,10 @@ subroutine vec_ctf_test_u8i2(arg1)
   vector(real(8)) :: r
   r = vec_ctf(arg1, 3_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[varg:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg:.*]] = llvm.uitofp %[[varg]] : vector<2xi64> to vector<2xf64>
-! CHECK-FIR: %[[cst:.*]] = arith.constant dense<1.250000e-01> : vector<2xf64>
-! CHECK-FIR: %[[r:.*]] = llvm.fmul %[[carg]], %[[cst]]  : vector<2xf64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[carg:.*]] = llvm.uitofp %[[arg1]] : vector<2xi64> to vector<2xf64>
-! CHECK-LLVMIR: %[[cst:.*]] = llvm.mlir.constant(dense<1.250000e-01> : vector<2xf64>) : vector<2xf64>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.fmul %[[carg]], %[[cst]]  : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[carg:.*]] = uitofp <2 x i64> %[[arg1]] to <2 x double>
-! CHECK: %[[r:.*]] = fmul <2 x double> %[[carg]], <double 1.250000e-01, double 1.250000e-01>
-! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[carg:.*]] = uitofp <2 x i64> %[[arg1]] to <2 x double>
+! LLVMIR: %[[r:.*]] = fmul <2 x double> %[[carg]], <double 1.250000e-01, double 1.250000e-01>
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_ctf_test_u8i2
 
 ! CHECK-LABEL: vec_ctf_test_u8i4
@@ -348,24 +170,10 @@ subroutine vec_ctf_test_u8i4(arg1)
   vector(real(8)) :: r
   r = vec_ctf(arg1, 3_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[varg:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg:.*]] = llvm.uitofp %[[varg]] : vector<2xi64> to vector<2xf64>
-! CHECK-FIR: %[[cst:.*]] = arith.constant dense<1.250000e-01> : vector<2xf64>
-! CHECK-FIR: %[[r:.*]] = llvm.fmul %[[carg]], %[[cst]]  : vector<2xf64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[carg:.*]] = llvm.uitofp %[[arg1]] : vector<2xi64> to vector<2xf64>
-! CHECK-LLVMIR: %[[cst:.*]] = llvm.mlir.constant(dense<1.250000e-01> : vector<2xf64>) : vector<2xf64>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.fmul %[[carg]], %[[cst]]  : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[carg:.*]] = uitofp <2 x i64> %[[arg1]] to <2 x double>
-! CHECK: %[[r:.*]] = fmul <2 x double> %[[carg]], <double 1.250000e-01, double 1.250000e-01>
-! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[carg:.*]] = uitofp <2 x i64> %[[arg1]] to <2 x double>
+! LLVMIR: %[[r:.*]] = fmul <2 x double> %[[carg]], <double 1.250000e-01, double 1.250000e-01>
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_ctf_test_u8i4
 
 ! CHECK-LABEL: vec_ctf_test_u8i8
@@ -374,24 +182,10 @@ subroutine vec_ctf_test_u8i8(arg1)
   vector(real(8)) :: r
   r = vec_ctf(arg1, 3_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[varg:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg:.*]] = llvm.uitofp %[[varg]] : vector<2xi64> to vector<2xf64>
-! CHECK-FIR: %[[cst:.*]] = arith.constant dense<1.250000e-01> : vector<2xf64>
-! CHECK-FIR: %[[r:.*]] = llvm.fmul %[[carg]], %[[cst]]  : vector<2xf64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[carg:.*]] = llvm.uitofp %[[arg1]] : vector<2xi64> to vector<2xf64>
-! CHECK-LLVMIR: %[[cst:.*]] = llvm.mlir.constant(dense<1.250000e-01> : vector<2xf64>) : vector<2xf64>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.fmul %[[carg]], %[[cst]]  : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[carg:.*]] = uitofp <2 x i64> %[[arg1]] to <2 x double>
-! CHECK: %[[r:.*]] = fmul <2 x double> %[[carg]], <double 1.250000e-01, double 1.250000e-01>
-! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[carg:.*]] = uitofp <2 x i64> %[[arg1]] to <2 x double>
+! LLVMIR: %[[r:.*]] = fmul <2 x double> %[[carg]], <double 1.250000e-01, double 1.250000e-01>
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_ctf_test_u8i8
 
 !-------------
@@ -403,17 +197,8 @@ subroutine vec_convert_test_i1i1(v, mold)
   vector(integer(1)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<16xi8> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: llvm.store %[[v]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
-! CHECK: store <16 x i8> %[[v]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
+! LLVMIR: store <16 x i8> %[[v]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i1i1
 
 ! CHECK-LABEL: vec_convert_test_i1i2
@@ -422,19 +207,9 @@ subroutine vec_convert_test_i1i2(v, mold)
   vector(integer(2)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<16xi8> to vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <8 x i16>
-! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i1i2
 
 ! CHECK-LABEL: vec_convert_test_i1i4
@@ -443,19 +218,9 @@ subroutine vec_convert_test_i1i4(v, mold)
   vector(integer(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <4 x i32>
-! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i1i4
 
 ! CHECK-LABEL: vec_convert_test_i1i8
@@ -464,19 +229,9 @@ subroutine vec_convert_test_i1i8(v, mold)
   vector(integer(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<16xi8> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<16xi8> to vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <2 x i64>
-! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i1i8
 
 ! CHECK-LABEL: vec_convert_test_i1u1
@@ -485,17 +240,8 @@ subroutine vec_convert_test_i1u1(v, mold)
   vector(unsigned(1)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<16xi8> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: llvm.store %[[v]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
-! CHECK: store <16 x i8> %[[v]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
+! LLVMIR: store <16 x i8> %[[v]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i1u1
 
 ! CHECK-LABEL: vec_convert_test_i1u2
@@ -504,19 +250,9 @@ subroutine vec_convert_test_i1u2(v, mold)
   vector(unsigned(2)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<16xi8> to vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <8 x i16>
-! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i1u2
 
 ! CHECK-LABEL: vec_convert_test_i1u4
@@ -525,19 +261,9 @@ subroutine vec_convert_test_i1u4(v, mold)
   vector(unsigned(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <4 x i32>
-! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i1u4
 
 ! CHECK-LABEL: vec_convert_test_i1u8
@@ -546,19 +272,9 @@ subroutine vec_convert_test_i1u8(v, mold)
   vector(unsigned(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<16xi8> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<16xi8> to vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <2 x i64>
-! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i1u8
 
 ! CHECK-LABEL: vec_convert_test_i1r4
@@ -567,19 +283,9 @@ subroutine vec_convert_test_i1r4(v, mold)
   vector(real(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<16xi8> to vector<4xf32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<16xi8> to vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <4 x float>
-! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <4 x float>
+! LLVMIR: store <4 x float> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i1r4
 
 ! CHECK-LABEL: vec_convert_test_i1r8
@@ -588,19 +294,9 @@ subroutine vec_convert_test_i1r8(v, mold)
   vector(real(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<16xi8> to vector<2xf64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<16xi8> to vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <2 x double>
-! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <2 x double>
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i1r8
 
 ! CHECK-LABEL: vec_convert_test_i2i1
@@ -609,19 +305,9 @@ subroutine vec_convert_test_i2i1(v, mold)
   vector(integer(1)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<8xi16> to vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <16 x i8>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i2i1
 
 ! CHECK-LABEL: vec_convert_test_i2i2
@@ -630,17 +316,8 @@ subroutine vec_convert_test_i2i2(v, mold)
   vector(integer(2)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<8xi16> to vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: llvm.store %[[v]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: store <8 x i16> %[[v]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: store <8 x i16> %[[v]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i2i2
 
 ! CHECK-LABEL: vec_convert_test_i2i4
@@ -649,19 +326,9 @@ subroutine vec_convert_test_i2i4(v, mold)
   vector(integer(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <4 x i32>
-! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i2i4
 
 ! CHECK-LABEL: vec_convert_test_i2i8
@@ -670,19 +337,9 @@ subroutine vec_convert_test_i2i8(v, mold)
   vector(integer(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<8xi16> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<8xi16> to vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <2 x i64>
-! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i2i8
 
 ! CHECK-LABEL: vec_convert_test_i2u1
@@ -691,19 +348,9 @@ subroutine vec_convert_test_i2u1(v, mold)
   vector(unsigned(1)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<8xi16> to vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <16 x i8>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i2u1
 
 ! CHECK-LABEL: vec_convert_test_i2u2
@@ -712,17 +359,8 @@ subroutine vec_convert_test_i2u2(v, mold)
   vector(unsigned(2)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<8xi16> to vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: llvm.store %[[v]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: store <8 x i16> %[[v]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: store <8 x i16> %[[v]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i2u2
 
 ! CHECK-LABEL: vec_convert_test_i2u4
@@ -731,19 +369,9 @@ subroutine vec_convert_test_i2u4(v, mold)
   vector(unsigned(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <4 x i32>
-! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i2u4
 
 ! CHECK-LABEL: vec_convert_test_i2u8
@@ -752,19 +380,9 @@ subroutine vec_convert_test_i2u8(v, mold)
   vector(unsigned(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<8xi16> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<8xi16> to vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <2 x i64>
-! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i2u8
 
 ! CHECK-LABEL: vec_convert_test_i2r4
@@ -773,19 +391,9 @@ subroutine vec_convert_test_i2r4(v, mold)
   vector(real(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<8xi16> to vector<4xf32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<8xi16> to vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <4 x float>
-! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <4 x float>
+! LLVMIR: store <4 x float> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i2r4
 
 ! CHECK-LABEL: vec_convert_test_i2r8
@@ -794,19 +402,9 @@ subroutine vec_convert_test_i2r8(v, mold)
   vector(real(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<8xi16> to vector<2xf64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<8xi16> to vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <2 x double>
-! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <2 x double>
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i2r8
 
 ! CHECK-LABEL: vec_convert_test_i4i1
@@ -815,19 +413,9 @@ subroutine vec_convert_test_i4i1(v, mold)
   vector(integer(1)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xi32> to vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <16 x i8>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i4i1
 
 ! CHECK-LABEL: vec_convert_test_i4i2
@@ -836,19 +424,9 @@ subroutine vec_convert_test_i4i2(v, mold)
   vector(integer(2)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xi32> to vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <8 x i16>
-! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i4i2
 
 ! CHECK-LABEL: vec_convert_test_i4i4
@@ -857,17 +435,8 @@ subroutine vec_convert_test_i4i4(v, mold)
   vector(integer(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xi32> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: llvm.store %[[v]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: store <4 x i32> %[[v]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: store <4 x i32> %[[v]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i4i4
 
 ! CHECK-LABEL: vec_convert_test_i4i8
@@ -876,19 +445,9 @@ subroutine vec_convert_test_i4i8(v, mold)
   vector(integer(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xi32> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xi32> to vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <2 x i64>
-! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i4i8
 
 ! CHECK-LABEL: vec_convert_test_i4u1
@@ -897,19 +456,9 @@ subroutine vec_convert_test_i4u1(v, mold)
   vector(unsigned(1)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xi32> to vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <16 x i8>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i4u1
 
 ! CHECK-LABEL: vec_convert_test_i4u2
@@ -918,19 +467,9 @@ subroutine vec_convert_test_i4u2(v, mold)
   vector(unsigned(2)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xi32> to vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <8 x i16>
-! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i4u2
 
 ! CHECK-LABEL: vec_convert_test_i4u4
@@ -939,17 +478,8 @@ subroutine vec_convert_test_i4u4(v, mold)
   vector(unsigned(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xi32> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: llvm.store %[[v]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: store <4 x i32> %[[v]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: store <4 x i32> %[[v]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i4u4
 
 ! CHECK-LABEL: vec_convert_test_i4u8
@@ -958,19 +488,9 @@ subroutine vec_convert_test_i4u8(v, mold)
   vector(unsigned(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xi32> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xi32> to vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <2 x i64>
-! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i4u8
 
 ! CHECK-LABEL: vec_convert_test_i4r4
@@ -979,19 +499,9 @@ subroutine vec_convert_test_i4r4(v, mold)
   vector(real(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xi32> to vector<4xf32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xi32> to vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <4 x float>
-! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <4 x float>
+! LLVMIR: store <4 x float> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i4r4
 
 ! CHECK-LABEL: vec_convert_test_i4r8
@@ -1000,19 +510,9 @@ subroutine vec_convert_test_i4r8(v, mold)
   vector(real(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xi32> to vector<2xf64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xi32> to vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <2 x double>
-! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <2 x double>
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i4r8
 
 ! CHECK-LABEL: vec_convert_test_i8i1
@@ -1021,19 +521,9 @@ subroutine vec_convert_test_i8i1(v, mold)
   vector(integer(1)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xi64> to vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <16 x i8>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i8i1
 
 ! CHECK-LABEL: vec_convert_test_i8i2
@@ -1042,19 +532,9 @@ subroutine vec_convert_test_i8i2(v, mold)
   vector(integer(2)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xi64> to vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xi64> to vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <8 x i16>
-! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i8i2
 
 ! CHECK-LABEL: vec_convert_test_i8i4
@@ -1063,19 +543,9 @@ subroutine vec_convert_test_i8i4(v, mold)
   vector(integer(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xi64> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xi64> to vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <4 x i32>
-! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i8i4
 
 ! CHECK-LABEL: vec_convert_test_i8i8
@@ -1084,17 +554,8 @@ subroutine vec_convert_test_i8i8(v, mold)
   vector(integer(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xi64> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: llvm.store %[[v]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
-! CHECK: store <2 x i64> %[[v]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
+! LLVMIR: store <2 x i64> %[[v]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i8i8
 
 ! CHECK-LABEL: vec_convert_test_i8u1
@@ -1103,19 +564,9 @@ subroutine vec_convert_test_i8u1(v, mold)
   vector(unsigned(1)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xi64> to vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <16 x i8>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i8u1
 
 ! CHECK-LABEL: vec_convert_test_i8u2
@@ -1124,19 +575,9 @@ subroutine vec_convert_test_i8u2(v, mold)
   vector(unsigned(2)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xi64> to vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xi64> to vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <8 x i16>
-! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i8u2
 
 ! CHECK-LABEL: vec_convert_test_i8u4
@@ -1145,19 +586,9 @@ subroutine vec_convert_test_i8u4(v, mold)
   vector(unsigned(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xi64> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xi64> to vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <4 x i32>
-! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i8u4
 
 ! CHECK-LABEL: vec_convert_test_i8u8
@@ -1166,17 +597,8 @@ subroutine vec_convert_test_i8u8(v, mold)
   vector(unsigned(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xi64> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: llvm.store %[[v]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
-! CHECK: store <2 x i64> %[[v]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
+! LLVMIR: store <2 x i64> %[[v]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i8u8
 
 ! CHECK-LABEL: vec_convert_test_i8r4
@@ -1185,19 +607,9 @@ subroutine vec_convert_test_i8r4(v, mold)
   vector(real(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xi64> to vector<4xf32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xi64> to vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <4 x float>
-! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <4 x float>
+! LLVMIR: store <4 x float> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i8r4
 
 ! CHECK-LABEL: vec_convert_test_i8r8
@@ -1206,19 +618,9 @@ subroutine vec_convert_test_i8r8(v, mold)
   vector(real(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xi64> to vector<2xf64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xi64> to vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <2 x double>
-! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <2 x double>
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i8r8
 
 ! CHECK-LABEL: vec_convert_test_u1i1
@@ -1227,17 +629,8 @@ subroutine vec_convert_test_u1i1(v, mold)
   vector(integer(1)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<16xi8> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: llvm.store %[[v]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
-! CHECK: store <16 x i8> %[[v]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
+! LLVMIR: store <16 x i8> %[[v]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u1i1
 
 ! CHECK-LABEL: vec_convert_test_u1i2
@@ -1246,19 +639,9 @@ subroutine vec_convert_test_u1i2(v, mold)
   vector(integer(2)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<16xi8> to vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <8 x i16>
-! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u1i2
 
 ! CHECK-LABEL: vec_convert_test_u1i4
@@ -1267,19 +650,9 @@ subroutine vec_convert_test_u1i4(v, mold)
   vector(integer(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <4 x i32>
-! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u1i4
 
 ! CHECK-LABEL: vec_convert_test_u1i8
@@ -1288,19 +661,9 @@ subroutine vec_convert_test_u1i8(v, mold)
   vector(integer(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<16xi8> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<16xi8> to vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <2 x i64>
-! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u1i8
 
 ! CHECK-LABEL: vec_convert_test_u1u1
@@ -1309,17 +672,8 @@ subroutine vec_convert_test_u1u1(v, mold)
   vector(unsigned(1)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<16xi8> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: llvm.store %[[v]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
-! CHECK: store <16 x i8> %[[v]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
+! LLVMIR: store <16 x i8> %[[v]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u1u1
 
 ! CHECK-LABEL: vec_convert_test_u1u2
@@ -1328,19 +682,9 @@ subroutine vec_convert_test_u1u2(v, mold)
   vector(unsigned(2)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<16xi8> to vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <8 x i16>
-! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u1u2
 
 ! CHECK-LABEL: vec_convert_test_u1u4
@@ -1349,19 +693,9 @@ subroutine vec_convert_test_u1u4(v, mold)
   vector(unsigned(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <4 x i32>
-! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u1u4
 
 ! CHECK-LABEL: vec_convert_test_u1u8
@@ -1370,19 +704,9 @@ subroutine vec_convert_test_u1u8(v, mold)
   vector(unsigned(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<16xi8> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<16xi8> to vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <2 x i64>
-! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u1u8
 
 ! CHECK-LABEL: vec_convert_test_u1r4
@@ -1391,19 +715,9 @@ subroutine vec_convert_test_u1r4(v, mold)
   vector(real(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<16xi8> to vector<4xf32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<16xi8> to vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <4 x float>
-! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <4 x float>
+! LLVMIR: store <4 x float> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u1r4
 
 ! CHECK-LABEL: vec_convert_test_u1r8
@@ -1412,19 +726,9 @@ subroutine vec_convert_test_u1r8(v, mold)
   vector(real(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<16xi8> to vector<2xf64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<16xi8> to vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <2 x double>
-! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <16 x i8>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <16 x i8> %[[v]] to <2 x double>
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u1r8
 
 ! CHECK-LABEL: vec_convert_test_u2i1
@@ -1433,19 +737,9 @@ subroutine vec_convert_test_u2i1(v, mold)
   vector(integer(1)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<8xi16> to vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <16 x i8>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u2i1
 
 ! CHECK-LABEL: vec_convert_test_u2i2
@@ -1454,17 +748,8 @@ subroutine vec_convert_test_u2i2(v, mold)
   vector(integer(2)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<8xi16> to vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: llvm.store %[[v]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: store <8 x i16> %[[v]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: store <8 x i16> %[[v]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u2i2
 
 ! CHECK-LABEL: vec_convert_test_u2i4
@@ -1473,19 +758,9 @@ subroutine vec_convert_test_u2i4(v, mold)
   vector(integer(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <4 x i32>
-! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u2i4
 
 ! CHECK-LABEL: vec_convert_test_u2i8
@@ -1494,19 +769,9 @@ subroutine vec_convert_test_u2i8(v, mold)
   vector(integer(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<8xi16> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<8xi16> to vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <2 x i64>
-! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u2i8
 
 ! CHECK-LABEL: vec_convert_test_u2u1
@@ -1515,19 +780,9 @@ subroutine vec_convert_test_u2u1(v, mold)
   vector(unsigned(1)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<8xi16> to vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <16 x i8>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u2u1
 
 ! CHECK-LABEL: vec_convert_test_u2u2
@@ -1536,17 +791,8 @@ subroutine vec_convert_test_u2u2(v, mold)
   vector(unsigned(2)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<8xi16> to vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: llvm.store %[[v]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: store <8 x i16> %[[v]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: store <8 x i16> %[[v]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u2u2
 
 ! CHECK-LABEL: vec_convert_test_u2u4
@@ -1555,19 +801,9 @@ subroutine vec_convert_test_u2u4(v, mold)
   vector(unsigned(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <4 x i32>
-! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u2u4
 
 ! CHECK-LABEL: vec_convert_test_u2u8
@@ -1576,19 +812,9 @@ subroutine vec_convert_test_u2u8(v, mold)
   vector(unsigned(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<8xi16> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<8xi16> to vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <2 x i64>
-! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u2u8
 
 ! CHECK-LABEL: vec_convert_test_u2r4
@@ -1597,19 +823,9 @@ subroutine vec_convert_test_u2r4(v, mold)
   vector(real(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<8xi16> to vector<4xf32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<8xi16> to vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <4 x float>
-! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <4 x float>
+! LLVMIR: store <4 x float> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u2r4
 
 ! CHECK-LABEL: vec_convert_test_u2r8
@@ -1618,19 +834,9 @@ subroutine vec_convert_test_u2r8(v, mold)
   vector(real(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<8xi16> to vector<2xf64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<8xi16> to vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <2 x double>
-! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <8 x i16> %[[v]] to <2 x double>
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u2r8
 
 ! CHECK-LABEL: vec_convert_test_u4i1
@@ -1639,19 +845,9 @@ subroutine vec_convert_test_u4i1(v, mold)
   vector(integer(1)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xi32> to vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <16 x i8>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u4i1
 
 ! CHECK-LABEL: vec_convert_test_u4i2
@@ -1660,19 +856,9 @@ subroutine vec_convert_test_u4i2(v, mold)
   vector(integer(2)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xi32> to vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <8 x i16>
-! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u4i2
 
 ! CHECK-LABEL: vec_convert_test_u4i4
@@ -1681,17 +867,8 @@ subroutine vec_convert_test_u4i4(v, mold)
   vector(integer(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xi32> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: llvm.store %[[v]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: store <4 x i32> %[[v]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: store <4 x i32> %[[v]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u4i4
 
 ! CHECK-LABEL: vec_convert_test_u4i8
@@ -1700,19 +877,9 @@ subroutine vec_convert_test_u4i8(v, mold)
   vector(integer(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xi32> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xi32> to vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <2 x i64>
-! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u4i8
 
 ! CHECK-LABEL: vec_convert_test_u4u1
@@ -1721,19 +888,9 @@ subroutine vec_convert_test_u4u1(v, mold)
   vector(unsigned(1)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xi32> to vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <16 x i8>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u4u1
 
 ! CHECK-LABEL: vec_convert_test_u4u2
@@ -1742,19 +899,9 @@ subroutine vec_convert_test_u4u2(v, mold)
   vector(unsigned(2)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xi32> to vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <8 x i16>
-! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u4u2
 
 ! CHECK-LABEL: vec_convert_test_u4u4
@@ -1763,17 +910,8 @@ subroutine vec_convert_test_u4u4(v, mold)
   vector(unsigned(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xi32> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: llvm.store %[[v]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: store <4 x i32> %[[v]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: store <4 x i32> %[[v]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u4u4
 
 ! CHECK-LABEL: vec_convert_test_u4u8
@@ -1782,19 +920,9 @@ subroutine vec_convert_test_u4u8(v, mold)
   vector(unsigned(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xi32> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xi32> to vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <2 x i64>
-! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u4u8
 
 ! CHECK-LABEL: vec_convert_test_u4r4
@@ -1803,19 +931,9 @@ subroutine vec_convert_test_u4r4(v, mold)
   vector(real(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xi32> to vector<4xf32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xi32> to vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <4 x float>
-! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <4 x float>
+! LLVMIR: store <4 x float> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u4r4
 
 ! CHECK-LABEL: vec_convert_test_u4r8
@@ -1824,19 +942,9 @@ subroutine vec_convert_test_u4r8(v, mold)
   vector(real(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xi32> to vector<2xf64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xi32> to vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <2 x double>
-! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <2 x double>
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u4r8
 
 ! CHECK-LABEL: vec_convert_test_u8i1
@@ -1845,19 +953,9 @@ subroutine vec_convert_test_u8i1(v, mold)
   vector(integer(1)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xi64> to vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <16 x i8>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u8i1
 
 ! CHECK-LABEL: vec_convert_test_u8i2
@@ -1866,19 +964,9 @@ subroutine vec_convert_test_u8i2(v, mold)
   vector(integer(2)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xi64> to vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xi64> to vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <8 x i16>
-! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u8i2
 
 ! CHECK-LABEL: vec_convert_test_u8i4
@@ -1887,19 +975,9 @@ subroutine vec_convert_test_u8i4(v, mold)
   vector(integer(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xi64> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xi64> to vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <4 x i32>
-! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u8i4
 
 ! CHECK-LABEL: vec_convert_test_u8i8
@@ -1908,17 +986,8 @@ subroutine vec_convert_test_u8i8(v, mold)
   vector(integer(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xi64> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: llvm.store %[[v]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
-! CHECK: store <2 x i64> %[[v]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
+! LLVMIR: store <2 x i64> %[[v]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u8i8
 
 ! CHECK-LABEL: vec_convert_test_u8u1
@@ -1927,19 +996,9 @@ subroutine vec_convert_test_u8u1(v, mold)
   vector(unsigned(1)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xi64> to vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <16 x i8>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u8u1
 
 ! CHECK-LABEL: vec_convert_test_u8u2
@@ -1948,19 +1007,9 @@ subroutine vec_convert_test_u8u2(v, mold)
   vector(unsigned(2)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xi64> to vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xi64> to vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <8 x i16>
-! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u8u2
 
 ! CHECK-LABEL: vec_convert_test_u8u4
@@ -1969,19 +1018,9 @@ subroutine vec_convert_test_u8u4(v, mold)
   vector(unsigned(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xi64> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xi64> to vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <4 x i32>
-! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u8u4
 
 ! CHECK-LABEL: vec_convert_test_u8u8
@@ -1990,17 +1029,8 @@ subroutine vec_convert_test_u8u8(v, mold)
   vector(unsigned(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xi64> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: llvm.store %[[v]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
-! CHECK: store <2 x i64> %[[v]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
+! LLVMIR: store <2 x i64> %[[v]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u8u8
 
 ! CHECK-LABEL: vec_convert_test_u8r4
@@ -2009,19 +1039,9 @@ subroutine vec_convert_test_u8r4(v, mold)
   vector(real(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xi64> to vector<4xf32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xi64> to vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <4 x float>
-! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <4 x float>
+! LLVMIR: store <4 x float> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u8r4
 
 ! CHECK-LABEL: vec_convert_test_u8r8
@@ -2030,19 +1050,9 @@ subroutine vec_convert_test_u8r8(v, mold)
   vector(real(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xi64> to vector<2xf64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xi64> to vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <2 x double>
-! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x i64>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <2 x double>
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u8r8
 
 ! CHECK-LABEL: vec_convert_test_r4i1
@@ -2051,19 +1061,9 @@ subroutine vec_convert_test_r4i1(v, mold)
   vector(integer(1)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xf32> to vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load <4 x float>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x float> %[[v]] to <16 x i8>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x float>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x float> %[[v]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_r4i1
 
 ! CHECK-LABEL: vec_convert_test_r4i2
@@ -2072,19 +1072,9 @@ subroutine vec_convert_test_r4i2(v, mold)
   vector(integer(2)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xf32> to vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xf32> to vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load <4 x float>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x float> %[[v]] to <8 x i16>
-! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x float>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x float> %[[v]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_r4i2
 
 ! CHECK-LABEL: vec_convert_test_r4i4
@@ -2093,19 +1083,9 @@ subroutine vec_convert_test_r4i4(v, mold)
   vector(integer(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xf32> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xf32> to vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load <4 x float>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x float> %[[v]] to <4 x i32>
-! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x float>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x float> %[[v]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_r4i4
 
 ! CHECK-LABEL: vec_convert_test_r4i8
@@ -2114,19 +1094,9 @@ subroutine vec_convert_test_r4i8(v, mold)
   vector(integer(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xf32> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xf32> to vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load <4 x float>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x float> %[[v]] to <2 x i64>
-! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x float>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x float> %[[v]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_r4i8
 
 ! CHECK-LABEL: vec_convert_test_r4u1
@@ -2135,19 +1105,9 @@ subroutine vec_convert_test_r4u1(v, mold)
   vector(unsigned(1)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xf32> to vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load <4 x float>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x float> %[[v]] to <16 x i8>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x float>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x float> %[[v]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_r4u1
 
 ! CHECK-LABEL: vec_convert_test_r4u2
@@ -2156,19 +1116,9 @@ subroutine vec_convert_test_r4u2(v, mold)
   vector(unsigned(2)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xf32> to vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xf32> to vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load <4 x float>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x float> %[[v]] to <8 x i16>
-! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x float>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x float> %[[v]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_r4u2
 
 ! CHECK-LABEL: vec_convert_test_r4u4
@@ -2177,19 +1127,9 @@ subroutine vec_convert_test_r4u4(v, mold)
   vector(unsigned(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xf32> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xf32> to vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load <4 x float>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x float> %[[v]] to <4 x i32>
-! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x float>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x float> %[[v]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_r4u4
 
 ! CHECK-LABEL: vec_convert_test_r4u8
@@ -2198,19 +1138,9 @@ subroutine vec_convert_test_r4u8(v, mold)
   vector(unsigned(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xf32> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xf32> to vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load <4 x float>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x float> %[[v]] to <2 x i64>
-! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x float>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x float> %[[v]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_r4u8
 
 ! CHECK-LABEL: vec_convert_test_r4r4
@@ -2219,17 +1149,8 @@ subroutine vec_convert_test_r4r4(v, mold)
   vector(real(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xf32> to vector<4xf32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: llvm.store %[[v]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[v:.*]] = load <4 x float>, ptr %0, align 16
-! CHECK: store <4 x float> %[[v]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x float>, ptr %0, align 16
+! LLVMIR: store <4 x float> %[[v]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_r4r4
 
 ! CHECK-LABEL: vec_convert_test_r4r8
@@ -2238,19 +1159,9 @@ subroutine vec_convert_test_r4r8(v, mold)
   vector(real(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<4xf32> to vector<2xf64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xf32> to vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[v:.*]] = load <4 x float>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x float> %[[v]] to <2 x double>
-! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x float>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x float> %[[v]] to <2 x double>
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_r4r8
 
 ! CHECK-LABEL: vec_convert_test_r8i1
@@ -2259,19 +1170,9 @@ subroutine vec_convert_test_r8i1(v, mold)
   vector(integer(1)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xf64> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xf64> to vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load <2 x double>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x double> %[[v]] to <16 x i8>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x double>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x double> %[[v]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_r8i1
 
 ! CHECK-LABEL: vec_convert_test_r8i2
@@ -2280,19 +1181,9 @@ subroutine vec_convert_test_r8i2(v, mold)
   vector(integer(2)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xf64> to vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xf64> to vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load <2 x double>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x double> %[[v]] to <8 x i16>
-! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x double>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x double> %[[v]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_r8i2
 
 ! CHECK-LABEL: vec_convert_test_r8i4
@@ -2301,19 +1192,9 @@ subroutine vec_convert_test_r8i4(v, mold)
   vector(integer(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xf64> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xf64> to vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load <2 x double>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x double> %[[v]] to <4 x i32>
-! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x double>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x double> %[[v]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_r8i4
 
 ! CHECK-LABEL: vec_convert_test_r8i8
@@ -2322,19 +1203,9 @@ subroutine vec_convert_test_r8i8(v, mold)
   vector(integer(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xf64> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xf64> to vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load <2 x double>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x double> %[[v]] to <2 x i64>
-! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x double>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x double> %[[v]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_r8i8
 
 ! CHECK-LABEL: vec_convert_test_r8u1
@@ -2343,19 +1214,9 @@ subroutine vec_convert_test_r8u1(v, mold)
   vector(unsigned(1)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xf64> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xf64> to vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load <2 x double>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x double> %[[v]] to <16 x i8>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x double>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x double> %[[v]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_r8u1
 
 ! CHECK-LABEL: vec_convert_test_r8u2
@@ -2364,19 +1225,9 @@ subroutine vec_convert_test_r8u2(v, mold)
   vector(unsigned(2)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xf64> to vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xf64> to vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load <2 x double>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x double> %[[v]] to <8 x i16>
-! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x double>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x double> %[[v]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_r8u2
 
 ! CHECK-LABEL: vec_convert_test_r8u4
@@ -2385,19 +1236,9 @@ subroutine vec_convert_test_r8u4(v, mold)
   vector(unsigned(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xf64> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xf64> to vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load <2 x double>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x double> %[[v]] to <4 x i32>
-! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x double>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x double> %[[v]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_r8u4
 
 ! CHECK-LABEL: vec_convert_test_r8u8
@@ -2406,19 +1247,9 @@ subroutine vec_convert_test_r8u8(v, mold)
   vector(unsigned(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xf64> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xf64> to vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load <2 x double>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x double> %[[v]] to <2 x i64>
-! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x double>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x double> %[[v]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_r8u8
 
 ! CHECK-LABEL: vec_convert_test_r8r4
@@ -2427,19 +1258,9 @@ subroutine vec_convert_test_r8r4(v, mold)
   vector(real(4)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xf64> to vector<4xf32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xf64> to vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[v:.*]] = load <2 x double>, ptr %0, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x double> %[[v]] to <4 x float>
-! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x double>, ptr %0, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x double> %[[v]] to <4 x float>
+! LLVMIR: store <4 x float> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_r8r4
 
 ! CHECK-LABEL: vec_convert_test_r8r8
@@ -2448,17 +1269,8 @@ subroutine vec_convert_test_r8r8(v, mold)
   vector(real(8)) :: mold, r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[vc:.*]] = fir.convert %[[v]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[vc]] : vector<2xf64> to vector<2xf64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: llvm.store %[[v]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[v:.*]] = load <2 x double>, ptr %0, align 16
-! CHECK: store <2 x double> %[[v]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x double>, ptr %0, align 16
+! LLVMIR: store <2 x double> %[[v]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_r8r8
 
 ! CHECK-LABEL: vec_convert_test_i1i1_array
@@ -2467,17 +1279,8 @@ subroutine vec_convert_test_i1i1_array(v, mold)
   vector(integer(1)) :: mold(4, 8), r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[cv:.*]] = fir.convert %[[v]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[cv]] : vector<16xi8> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]]  = fir.convert %[[b]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: llvm.store %[[v]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: store <16 x i8> %[[v]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: store <16 x i8> %[[v]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i1i1_array
 
 ! CHECK-LABEL: vec_convert_test_i4r8_array
@@ -2486,19 +1289,9 @@ subroutine vec_convert_test_i4r8_array(v, mold)
   vector(real(8)) :: mold(2, 4, 8), r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[cv:.*]] = fir.convert %[[v]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[cv]] : vector<4xi32> to vector<2xf64>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<4xi32> to vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[v:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <2 x double>
-! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = bitcast <4 x i32> %[[v]] to <2 x double>
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_i4r8_array
 
 ! CHECK-LABEL: vec_convert_test_u8i2_array
@@ -2507,19 +1300,9 @@ subroutine vec_convert_test_u8i2_array(v, mold)
   vector(integer(2)) :: mold(10), r
   r = vec_convert(v, mold)
 
-! CHECK-FIR: %[[v:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[cv:.*]] = fir.convert %[[v]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[b:.*]] = llvm.bitcast %[[cv]] : vector<2xi64> to vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[b]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[v]] : vector<2xi64> to vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <8 x i16>
-! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[v:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = bitcast <2 x i64> %[[v]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_convert_test_u8i2_array
 
 !---------
@@ -2531,29 +1314,12 @@ subroutine vec_cvf_test_r4r8(arg1)
   vector(real(4)) :: r
   r = vec_cvf(arg1)
 
-! CHECK-FIR: %[[arg:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[carg:.*]] = fir.convert %[[arg]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.xvcvdpsp(%[[carg]]) fastmath<contract> : (vector<2xf64>) -> !fir.vector<4:f32>
-! CHECK-FIR: %[[ccall:.*]] = fir.convert %[[call]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[bfi:.*]] = llvm.bitcast %[[ccall]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[sh:.*]] = vector.shuffle %[[bfi]], %[[bfi]] [4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[bif:.*]] = llvm.bitcast %[[sh]] : vector<16xi8> to vector<4xf32>
-! CHECK-FIR: %[[r:.*]] = fir.convert %[[bif]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[arg:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[call:.*]] = llvm.call @llvm.ppc.vsx.xvcvdpsp(%[[arg]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xf64>) -> vector<4xf32>
-! CHECK-LLVMIR: %[[b:.*]] = llvm.bitcast %[[call]] : vector<4xf32> to vector<16xi8>
-! CHECK-LLVMIR: %[[sh:.*]] = llvm.shufflevector %[[b]], %[[b]] [4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11] : vector<16xi8>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.bitcast %[[sh]] : vector<16xi8> to vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[arg:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[call:.*]] = call contract <4 x float> @llvm.ppc.vsx.xvcvdpsp(<2 x double> %[[arg]])
-! CHECK: %[[b:.*]] = bitcast <4 x float> %[[call]] to <16 x i8>
-! CHECK: %[[sh:.*]] = shufflevector <16 x i8> %[[b]], <16 x i8> %[[b]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
-! CHECK: %[[r:.*]] = bitcast <16 x i8> %[[sh]] to <4 x float>
-! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[call:.*]] = call contract <4 x float> @llvm.ppc.vsx.xvcvdpsp(<2 x double> %[[arg]])
+! LLVMIR: %[[b:.*]] = bitcast <4 x float> %[[call]] to <16 x i8>
+! LLVMIR: %[[sh:.*]] = shufflevector <16 x i8> %[[b]], <16 x i8> %[[b]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
+! LLVMIR: %[[r:.*]] = bitcast <16 x i8> %[[sh]] to <4 x float>
+! LLVMIR: store <4 x float> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_cvf_test_r4r8
 
 ! CHECK-LABEL: vec_cvf_test_r8r4
@@ -2562,27 +1328,11 @@ subroutine vec_cvf_test_r8r4(arg1)
   vector(real(8)) :: r
   r = vec_cvf(arg1)
 
-! CHECK-FIR: %[[arg:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[carg:.*]] = fir.convert %[[arg]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[bfi:.*]] = llvm.bitcast %[[carg]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[sh:.*]] = vector.shuffle %[[bfi]], %[[bfi]] [4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[bif:.*]] = llvm.bitcast %[[sh]] : vector<16xi8> to vector<4xf32>
-! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.xvcvspdp(%[[bif]]) fastmath<contract> : (vector<4xf32>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[call]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[arg:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[bfi:.*]] = llvm.bitcast %[[arg]] : vector<4xf32> to vector<16xi8>
-! CHECK-LLVMIR: %[[sh:.*]] = llvm.shufflevector %[[bfi]], %[[bfi]] [4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11] : vector<16xi8>
-! CHECK-LLVMIR: %[[bif:.*]] = llvm.bitcast %[[sh]] : vector<16xi8> to vector<4xf32>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.call @llvm.ppc.vsx.xvcvspdp(%[[bif]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xf32>) -> vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[arg:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[bfi:.*]] = bitcast <4 x float> %[[arg]] to <16 x i8>
-! CHECK: %[[sh:.*]] = shufflevector <16 x i8> %[[bfi]], <16 x i8> %[[bfi]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
-! CHECK: %[[bif:.*]] = bitcast <16 x i8> %[[sh]] to <4 x float>
-! CHECK: %[[r:.*]] = call contract <2 x double> @llvm.ppc.vsx.xvcvspdp(<4 x float> %[[bif]])
-! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[bfi:.*]] = bitcast <4 x float> %[[arg]] to <16 x i8>
+! LLVMIR: %[[sh:.*]] = shufflevector <16 x i8> %[[bfi]], <16 x i8> %[[bfi]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
+! LLVMIR: %[[bif:.*]] = bitcast <16 x i8> %[[sh]] to <4 x float>
+! LLVMIR: %[[r:.*]] = call contract <2 x double> @llvm.ppc.vsx.xvcvspdp(<4 x float> %[[bif]])
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_cvf_test_r8r4
 
-

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-cvf-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-cvf-elem-order.f90
new file mode 100644
index 000000000000000..c5bcefc98559cb3
--- /dev/null
+++ b/flang/test/Lower/PowerPC/ppc-vec-cvf-elem-order.f90
@@ -0,0 +1,24 @@
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="LLVMIR" %s
+! REQUIRES: target=powerpc{{.*}}
+
+! CHECK-LABEL: vec_cvf_test_r4r8
+subroutine vec_cvf_test_r4r8(arg1)
+  vector(real(8)), intent(in) :: arg1
+  vector(real(4)) :: r
+  r = vec_cvf(arg1)
+
+! LLVMIR: %[[arg:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[call:.*]] = call contract <4 x float> @llvm.ppc.vsx.xvcvdpsp(<2 x double> %[[arg]])
+! LLVMIR: store <4 x float> %[[call]], ptr %{{.*}}, align 16
+end subroutine vec_cvf_test_r4r8
+
+! CHECK-LABEL: vec_cvf_test_r8r4
+subroutine vec_cvf_test_r8r4(arg1)
+  vector(real(4)), intent(in) :: arg1
+  vector(real(8)) :: r
+  r = vec_cvf(arg1)
+
+! LLVMIR: %[[arg:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = call contract <2 x double> @llvm.ppc.vsx.xvcvspdp(<4 x float> %[[arg]])
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+end subroutine vec_cvf_test_r8r4

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-extract-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-extract-elem-order.f90
index 1feb32397f48b18..73669c25b339e20 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-extract-elem-order.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-extract-elem-order.f90
@@ -1,5 +1,4 @@
-! RUN: %flang_fc1 -emit-fir %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="FIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="LLVMIR" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
 !CHECK-LABEL: vec_extract_testr4i8
@@ -9,16 +8,6 @@ subroutine vec_extract_testr4i8(arg1, arg2, r)
   integer(8) :: arg2
   r = vec_extract(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-! FIR: %[[arg2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i64>
-! FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! FIR: %[[c:.*]] = arith.constant 4 : i64
-! FIR: %[[urem:.*]] = llvm.urem %[[arg2]], %[[c]] : i64
-! FIR: %[[c2:.*]] = arith.constant 3 : i64
-! FIR: %[[sub:.*]] = llvm.sub %[[c2]], %[[urem]] : i64
-! FIR: %[[ext:.*]] = vector.extractelement %[[varg1]][%[[sub]] : i64] : vector<4xf32>
-! FIR: fir.store %[[ext]] to %arg2 : !fir.ref<f32>
-
 ! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[arg2:.*]] = load i64, ptr %{{[0-9]}}, align 8
 ! LLVMIR: %[[urem:.*]] = urem i64 %[[arg2]], 4
@@ -34,16 +23,6 @@ subroutine vec_extract_testi8i1(arg1, arg2, r)
   integer(1) :: arg2
   r = vec_extract(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-! FIR: %[[arg2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i8>
-! FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! FIR: %[[c:.*]] = arith.constant 2 : i8
-! FIR: %[[urem:.*]] = llvm.urem %[[arg2]], %[[c]]  : i8
-! FIR: %[[c2:.*]] = arith.constant 1 : i8
-! FIR: %[[sub:.*]] = llvm.sub %[[c2]], %[[urem]] : i8
-! FIR: %[[ext:.*]] = vector.extractelement %[[varg1]][%[[sub]] : i8] : vector<2xi64>
-! FIR: fir.store %[[ext]] to %arg2 : !fir.ref<i64>
-
 ! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[arg2:.*]] = load i8, ptr %{{[0-9]}}, align 1
 ! LLVMIR: %[[urem:.*]] = urem i8 %[[arg2]], 2

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-extract.f90 b/flang/test/Lower/PowerPC/ppc-vec-extract.f90
index 78405d5a535f734..1930c8b79d837c1 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-extract.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-extract.f90
@@ -1,6 +1,4 @@
-! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s
-! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
 !-------------
@@ -15,92 +13,36 @@ subroutine vec_extract_testf32(x, i1, i2, i4, i8)
   integer(4) :: i4
   integer(8) :: i8
   r = vec_extract(x, i1)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i8>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i8
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]]  : i8
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i8] : vector<4xf32>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<f32>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i8) : i8
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]]  : i8
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<f32>
-
-! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
-! CHECK: %[[u:.*]] = urem i8 %[[i1]], 4
-! CHECK: %[[r:.*]] = extractelement <4 x float> %[[x]], i8 %[[u]]
-! CHECK: store float %[[r]], ptr %{{[0-9]}}, align 4
+
+! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
+! LLVMIR: %[[u:.*]] = urem i8 %[[i1]], 4
+! LLVMIR: %[[r:.*]] = extractelement <4 x float> %[[x]], i8 %[[u]]
+! LLVMIR: store float %[[r]], ptr %{{[0-9]}}, align 4
 
   r = vec_extract(x, i2)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i16>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i16
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]]  : i16
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i16] : vector<4xf32>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<f32>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i16) : i16
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]]  : i16
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<f32>
-
-! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
-! CHECK: %[[u:.*]] = urem i16 %[[i2]], 4
-! CHECK: %[[r:.*]] = extractelement <4 x float> %[[x]], i16 %[[u]]
-! CHECK: store float %[[r]], ptr %{{[0-9]}}, align 4
+
+! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
+! LLVMIR: %[[u:.*]] = urem i16 %[[i2]], 4
+! LLVMIR: %[[r:.*]] = extractelement <4 x float> %[[x]], i16 %[[u]]
+! LLVMIR: store float %[[r]], ptr %{{[0-9]}}, align 4
 
   r = vec_extract(x, i4)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i32>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i32
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]]  : i32
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i32] : vector<4xf32>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<f32>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i32) : i32
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]]  : i32
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<f32>
-
-! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
-! CHECK: %[[u:.*]] = urem i32 %[[i4]], 4
-! CHECK: %[[r:.*]] = extractelement <4 x float> %[[x]], i32 %[[u]]
-! CHECK: store float %[[r]], ptr %{{[0-9]}}, align 4
+
+! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
+! LLVMIR: %[[u:.*]] = urem i32 %[[i4]], 4
+! LLVMIR: %[[r:.*]] = extractelement <4 x float> %[[x]], i32 %[[u]]
+! LLVMIR: store float %[[r]], ptr %{{[0-9]}}, align 4
 
   r = vec_extract(x, i8)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i64>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i64
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]]  : i64
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i64] : vector<4xf32>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<f32>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i64>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i64) : i64
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]]  : i64
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<f32>
-
-! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
-! CHECK: %[[u:.*]] = urem i64 %[[i8]], 4
-! CHECK: %[[r:.*]] = extractelement <4 x float> %[[x]], i64 %[[u]]
-! CHECK: store float %[[r]], ptr %{{[0-9]}}, align 4
+
+! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
+! LLVMIR: %[[u:.*]] = urem i64 %[[i8]], 4
+! LLVMIR: %[[r:.*]] = extractelement <4 x float> %[[x]], i64 %[[u]]
+! LLVMIR: store float %[[r]], ptr %{{[0-9]}}, align 4
 end subroutine vec_extract_testf32
 
 ! CHECK-LABEL: vec_extract_testf64
@@ -112,92 +54,36 @@ subroutine vec_extract_testf64(x, i1, i2, i4, i8)
   integer(4) :: i4
   integer(8) :: i8
   r = vec_extract(x, i1)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i8>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i8
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]]  : i8
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i8] : vector<2xf64>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<f64>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i8) : i8
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]]  : i8
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<f64>
-
-! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
-! CHECK: %[[u:.*]] = urem i8 %[[i1]], 2
-! CHECK: %[[r:.*]] = extractelement <2 x double> %[[x]], i8 %[[u]]
-! CHECK: store double %[[r]], ptr %{{[0-9]}}, align 8
+
+! LLVMIR: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
+! LLVMIR: %[[u:.*]] = urem i8 %[[i1]], 2
+! LLVMIR: %[[r:.*]] = extractelement <2 x double> %[[x]], i8 %[[u]]
+! LLVMIR: store double %[[r]], ptr %{{[0-9]}}, align 8
 
   r = vec_extract(x, i2)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i16>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i16
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]]  : i16
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i16] : vector<2xf64>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<f64>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i16) : i16
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]]  : i16
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<f64>
-
-! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
-! CHECK: %[[u:.*]] = urem i16 %[[i2]], 2
-! CHECK: %[[r:.*]] = extractelement <2 x double> %[[x]], i16 %[[u]]
-! CHECK: store double %[[r]], ptr %{{[0-9]}}, align 8
+
+! LLVMIR: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
+! LLVMIR: %[[u:.*]] = urem i16 %[[i2]], 2
+! LLVMIR: %[[r:.*]] = extractelement <2 x double> %[[x]], i16 %[[u]]
+! LLVMIR: store double %[[r]], ptr %{{[0-9]}}, align 8
 
   r = vec_extract(x, i4)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i32>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i32
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]]  : i32
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i32] : vector<2xf64>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<f64>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i32) : i32
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]]  : i32
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<f64>
-
-! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
-! CHECK: %[[u:.*]] = urem i32 %[[i4]], 2
-! CHECK: %[[r:.*]] = extractelement <2 x double> %[[x]], i32 %[[u]]
-! CHECK: store double %[[r]], ptr %{{[0-9]}}, align 8
+
+! LLVMIR: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
+! LLVMIR: %[[u:.*]] = urem i32 %[[i4]], 2
+! LLVMIR: %[[r:.*]] = extractelement <2 x double> %[[x]], i32 %[[u]]
+! LLVMIR: store double %[[r]], ptr %{{[0-9]}}, align 8
 
   r = vec_extract(x, i8)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i64>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i64
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]]  : i64
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i64] : vector<2xf64>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<f64>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i64>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i64) : i64
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]]  : i64
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<f64>
-
-! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
-! CHECK: %[[u:.*]] = urem i64 %[[i8]], 2
-! CHECK: %[[r:.*]] = extractelement <2 x double> %[[x]], i64 %[[u]]
-! CHECK: store double %[[r]], ptr %{{[0-9]}}, align 8
+
+! LLVMIR: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
+! LLVMIR: %[[u:.*]] = urem i64 %[[i8]], 2
+! LLVMIR: %[[r:.*]] = extractelement <2 x double> %[[x]], i64 %[[u]]
+! LLVMIR: store double %[[r]], ptr %{{[0-9]}}, align 8
 end subroutine vec_extract_testf64
 
 ! CHECK-LABEL: vec_extract_testi8
@@ -209,92 +95,36 @@ subroutine vec_extract_testi8(x, i1, i2, i4, i8)
   integer(4) :: i4
   integer(8) :: i8
   r = vec_extract(x, i1)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i8>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[c:.*]] = arith.constant 16 : i8
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]]  : i8
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i8] : vector<16xi8>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<i8>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(16 : i8) : i8
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]]  : i8
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<i8>
-
-! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
-! CHECK: %[[u:.*]] = urem i8 %[[i1]], 16
-! CHECK: %[[r:.*]] = extractelement <16 x i8> %[[x]], i8 %[[u]]
-! CHECK: store i8 %[[r]], ptr %{{[0-9]}}, align 1
+
+! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
+! LLVMIR: %[[u:.*]] = urem i8 %[[i1]], 16
+! LLVMIR: %[[r:.*]] = extractelement <16 x i8> %[[x]], i8 %[[u]]
+! LLVMIR: store i8 %[[r]], ptr %{{[0-9]}}, align 1
 
   r = vec_extract(x, i2)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i16>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[c:.*]] = arith.constant 16 : i16
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]]  : i16
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i16] : vector<16xi8>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<i8>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(16 : i16) : i16
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]]  : i16
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<i8>
-
-! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
-! CHECK: %[[u:.*]] = urem i16 %[[i2]], 16
-! CHECK: %[[r:.*]] = extractelement <16 x i8> %[[x]], i16 %[[u]]
-! CHECK: store i8 %[[r]], ptr %{{[0-9]}}, align 1
+
+! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
+! LLVMIR: %[[u:.*]] = urem i16 %[[i2]], 16
+! LLVMIR: %[[r:.*]] = extractelement <16 x i8> %[[x]], i16 %[[u]]
+! LLVMIR: store i8 %[[r]], ptr %{{[0-9]}}, align 1
 
   r = vec_extract(x, i4)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i32>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[c:.*]] = arith.constant 16 : i32
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]]  : i32
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i32] : vector<16xi8>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<i8>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(16 : i32) : i32
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]]  : i32
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<i8>
-
-! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
-! CHECK: %[[u:.*]] = urem i32 %[[i4]], 16
-! CHECK: %[[r:.*]] = extractelement <16 x i8> %[[x]], i32 %[[u]]
-! CHECK: store i8 %[[r]], ptr %{{[0-9]}}, align 1
+
+! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
+! LLVMIR: %[[u:.*]] = urem i32 %[[i4]], 16
+! LLVMIR: %[[r:.*]] = extractelement <16 x i8> %[[x]], i32 %[[u]]
+! LLVMIR: store i8 %[[r]], ptr %{{[0-9]}}, align 1
 
   r = vec_extract(x, i8)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i64>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[c:.*]] = arith.constant 16 : i64
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]]  : i64
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i64] : vector<16xi8>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<i8>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i64>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(16 : i64) : i64
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]]  : i64
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<i8>
-
-! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
-! CHECK: %[[u:.*]] = urem i64 %[[i8]], 16
-! CHECK: %[[r:.*]] = extractelement <16 x i8> %[[x]], i64 %[[u]]
-! CHECK: store i8 %[[r]], ptr %{{[0-9]}}, align 1
+
+! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
+! LLVMIR: %[[u:.*]] = urem i64 %[[i8]], 16
+! LLVMIR: %[[r:.*]] = extractelement <16 x i8> %[[x]], i64 %[[u]]
+! LLVMIR: store i8 %[[r]], ptr %{{[0-9]}}, align 1
 end subroutine vec_extract_testi8
 
 ! CHECK-LABEL: vec_extract_testi16
@@ -306,92 +136,36 @@ subroutine vec_extract_testi16(x, i1, i2, i4, i8)
   integer(4) :: i4
   integer(8) :: i8
   r = vec_extract(x, i1)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i8>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[c:.*]] = arith.constant 8 : i8
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]]  : i8
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i8] : vector<8xi16>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<i16>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(8 : i8) : i8
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]]  : i8
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<i16>
-
-! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
-! CHECK: %[[u:.*]] = urem i8 %[[i1]], 8
-! CHECK: %[[r:.*]] = extractelement <8 x i16> %[[x]], i8 %[[u]]
-! CHECK: store i16 %[[r]], ptr %{{[0-9]}}, align 2
+
+! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
+! LLVMIR: %[[u:.*]] = urem i8 %[[i1]], 8
+! LLVMIR: %[[r:.*]] = extractelement <8 x i16> %[[x]], i8 %[[u]]
+! LLVMIR: store i16 %[[r]], ptr %{{[0-9]}}, align 2
 
   r = vec_extract(x, i2)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i16>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[c:.*]] = arith.constant 8 : i16
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]]  : i16
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i16] : vector<8xi16>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<i16>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(8 : i16) : i16
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]]  : i16
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<i16>
-
-! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
-! CHECK: %[[u:.*]] = urem i16 %[[i2]], 8
-! CHECK: %[[r:.*]] = extractelement <8 x i16> %[[x]], i16 %[[u]]
-! CHECK: store i16 %[[r]], ptr %{{[0-9]}}, align 2
+
+! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
+! LLVMIR: %[[u:.*]] = urem i16 %[[i2]], 8
+! LLVMIR: %[[r:.*]] = extractelement <8 x i16> %[[x]], i16 %[[u]]
+! LLVMIR: store i16 %[[r]], ptr %{{[0-9]}}, align 2
 
   r = vec_extract(x, i4)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i32>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[c:.*]] = arith.constant 8 : i32
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]]  : i32
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i32] : vector<8xi16>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<i16>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(8 : i32) : i32
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]]  : i32
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<i16>
-
-! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
-! CHECK: %[[u:.*]] = urem i32 %[[i4]], 8
-! CHECK: %[[r:.*]] = extractelement <8 x i16> %[[x]], i32 %[[u]]
-! CHECK: store i16 %[[r]], ptr %{{[0-9]}}, align 2
+
+! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
+! LLVMIR: %[[u:.*]] = urem i32 %[[i4]], 8
+! LLVMIR: %[[r:.*]] = extractelement <8 x i16> %[[x]], i32 %[[u]]
+! LLVMIR: store i16 %[[r]], ptr %{{[0-9]}}, align 2
 
   r = vec_extract(x, i8)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i64>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[c:.*]] = arith.constant 8 : i64
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]]  : i64
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i64] : vector<8xi16>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<i16>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i64>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(8 : i64) : i64
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]]  : i64
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<i16>
-
-! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
-! CHECK: %[[u:.*]] = urem i64 %[[i8]], 8
-! CHECK: %[[r:.*]] = extractelement <8 x i16> %[[x]], i64 %[[u]]
-! CHECK: store i16 %[[r]], ptr %{{[0-9]}}, align 2
+
+! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
+! LLVMIR: %[[u:.*]] = urem i64 %[[i8]], 8
+! LLVMIR: %[[r:.*]] = extractelement <8 x i16> %[[x]], i64 %[[u]]
+! LLVMIR: store i16 %[[r]], ptr %{{[0-9]}}, align 2
 end subroutine vec_extract_testi16
 
 ! CHECK-LABEL: vec_extract_testi32
@@ -403,92 +177,36 @@ subroutine vec_extract_testi32(x, i1, i2, i4, i8)
   integer(4) :: i4
   integer(8) :: i8
   r = vec_extract(x, i1)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i8>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i8
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]]  : i8
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i8] : vector<4xi32>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<i32>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i8) : i8
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]]  : i8
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<i32>
-
-! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
-! CHECK: %[[u:.*]] = urem i8 %[[i1]], 4
-! CHECK: %[[r:.*]] = extractelement <4 x i32> %[[x]], i8 %[[u]]
-! CHECK: store i32 %[[r]], ptr %{{[0-9]}}, align 4
+
+! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
+! LLVMIR: %[[u:.*]] = urem i8 %[[i1]], 4
+! LLVMIR: %[[r:.*]] = extractelement <4 x i32> %[[x]], i8 %[[u]]
+! LLVMIR: store i32 %[[r]], ptr %{{[0-9]}}, align 4
 
   r = vec_extract(x, i2)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i16>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i16
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]]  : i16
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i16] : vector<4xi32>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<i32>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i16) : i16
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]]  : i16
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<i32>
-
-! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
-! CHECK: %[[u:.*]] = urem i16 %[[i2]], 4
-! CHECK: %[[r:.*]] = extractelement <4 x i32> %[[x]], i16 %[[u]]
-! CHECK: store i32 %[[r]], ptr %{{[0-9]}}, align 4
+
+! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
+! LLVMIR: %[[u:.*]] = urem i16 %[[i2]], 4
+! LLVMIR: %[[r:.*]] = extractelement <4 x i32> %[[x]], i16 %[[u]]
+! LLVMIR: store i32 %[[r]], ptr %{{[0-9]}}, align 4
 
   r = vec_extract(x, i4)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i32>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i32
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]]  : i32
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i32] : vector<4xi32>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<i32>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i32) : i32
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]]  : i32
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<i32>
-
-! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
-! CHECK: %[[u:.*]] = urem i32 %[[i4]], 4
-! CHECK: %[[r:.*]] = extractelement <4 x i32> %[[x]], i32 %[[u]]
-! CHECK: store i32 %[[r]], ptr %{{[0-9]}}, align 4
+
+! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
+! LLVMIR: %[[u:.*]] = urem i32 %[[i4]], 4
+! LLVMIR: %[[r:.*]] = extractelement <4 x i32> %[[x]], i32 %[[u]]
+! LLVMIR: store i32 %[[r]], ptr %{{[0-9]}}, align 4
 
   r = vec_extract(x, i8)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i64>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i64
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]]  : i64
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i64] : vector<4xi32>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<i32>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i64>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i64) : i64
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]]  : i64
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<i32>
-
-! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
-! CHECK: %[[u:.*]] = urem i64 %[[i8]], 4
-! CHECK: %[[r:.*]] = extractelement <4 x i32> %[[x]], i64 %[[u]]
-! CHECK: store i32 %[[r]], ptr %{{[0-9]}}, align 4
+
+! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
+! LLVMIR: %[[u:.*]] = urem i64 %[[i8]], 4
+! LLVMIR: %[[r:.*]] = extractelement <4 x i32> %[[x]], i64 %[[u]]
+! LLVMIR: store i32 %[[r]], ptr %{{[0-9]}}, align 4
 end subroutine vec_extract_testi32
 
 ! CHECK-LABEL: vec_extract_testi64
@@ -500,90 +218,34 @@ subroutine vec_extract_testi64(x, i1, i2, i4, i8)
   integer(4) :: i4
   integer(8) :: i8
   r = vec_extract(x, i1)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i8>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i8
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]]  : i8
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i8] : vector<2xi64>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<i64>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i8) : i8
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i1]], %[[c]]  : i8
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<i64>
-
-! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
-! CHECK: %[[u:.*]] = urem i8 %[[i1]], 2
-! CHECK: %[[r:.*]] = extractelement <2 x i64> %[[x]], i8 %[[u]]
-! CHECK: store i64 %[[r]], ptr %{{[0-9]}}, align 8
+
+! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
+! LLVMIR: %[[u:.*]] = urem i8 %[[i1]], 2
+! LLVMIR: %[[r:.*]] = extractelement <2 x i64> %[[x]], i8 %[[u]]
+! LLVMIR: store i64 %[[r]], ptr %{{[0-9]}}, align 8
 
   r = vec_extract(x, i2)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i16>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i16
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]]  : i16
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i16] : vector<2xi64>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<i64>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i16) : i16
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i2]], %[[c]]  : i16
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<i64>
-
-! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
-! CHECK: %[[u:.*]] = urem i16 %[[i2]], 2
-! CHECK: %[[r:.*]] = extractelement <2 x i64> %[[x]], i16 %[[u]]
-! CHECK: store i64 %[[r]], ptr %{{[0-9]}}, align 8
+
+! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
+! LLVMIR: %[[u:.*]] = urem i16 %[[i2]], 2
+! LLVMIR: %[[r:.*]] = extractelement <2 x i64> %[[x]], i16 %[[u]]
+! LLVMIR: store i64 %[[r]], ptr %{{[0-9]}}, align 8
 
   r = vec_extract(x, i4)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i32>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i32
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]]  : i32
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i32] : vector<2xi64>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<i64>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i32) : i32
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i4]], %[[c]]  : i32
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<i64>
-
-! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
-! CHECK: %[[u:.*]] = urem i32 %[[i4]], 2
-! CHECK: %[[r:.*]] = extractelement <2 x i64> %[[x]], i32 %[[u]]
-! CHECK: store i64 %[[r]], ptr %{{[0-9]}}, align 8
+
+! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
+! LLVMIR: %[[u:.*]] = urem i32 %[[i4]], 2
+! LLVMIR: %[[r:.*]] = extractelement <2 x i64> %[[x]], i32 %[[u]]
+! LLVMIR: store i64 %[[r]], ptr %{{[0-9]}}, align 8
 
   r = vec_extract(x, i8)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i64>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i64
-! CHECK-FIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]]  : i64
-! CHECK-FIR: %[[r:.*]] = vector.extractelement %[[vr]][%[[u]] : i64] : vector<2xi64>
-! CHECK-FIR: fir.store %[[r]] to %{{[0-9]}} : !fir.ref<i64>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i64>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i64) : i64
-! CHECK-LLVMIR: %[[u:.*]] = llvm.urem %[[i8]], %[[c]]  : i64
-! CHECK-LLVMIR: %[[r:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<i64>
-
-! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
-! CHECK: %[[u:.*]] = urem i64 %[[i8]], 2
-! CHECK: %[[r:.*]] = extractelement <2 x i64> %[[x]], i64 %[[u]]
-! CHECK: store i64 %[[r]], ptr %{{[0-9]}}, align 8
+
+! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
+! LLVMIR: %[[u:.*]] = urem i64 %[[i8]], 2
+! LLVMIR: %[[r:.*]] = extractelement <2 x i64> %[[x]], i64 %[[u]]
+! LLVMIR: store i64 %[[r]], ptr %{{[0-9]}}, align 8
 end subroutine vec_extract_testi64

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-insert-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-insert-elem-order.f90
index c607dde3c9f1402..f64df46f170ab81 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-insert-elem-order.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-insert-elem-order.f90
@@ -1,5 +1,4 @@
-! RUN: %flang_fc1 -emit-fir %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="FIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="LLVMIR" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
 !CHECK-LABEL: vec_insert_testf32i64
@@ -10,18 +9,6 @@ subroutine vec_insert_testf32i64(v, x, i8)
   integer(8) :: i8
   r = vec_insert(v, x, i8)
 
-! FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<f32>
-! FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-! FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i64>
-! FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! FIR: %[[c:.*]] = arith.constant 4 : i64
-! FIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64
-! FIR: %[[c3:.*]] = arith.constant 3 : i64
-! FIR: %[[sub:.*]] = llvm.sub %[[c3]], %[[urem]] : i64
-! FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[sub]] : i64] : vector<4xf32>
-! FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-
 ! LLVMIR: %[[v:.*]] = load float, ptr %{{[0-9]}}, align 4
 ! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
@@ -39,18 +26,6 @@ subroutine vec_insert_testi64i8(v, x, i1, i2, i4, i8)
   integer(1) :: i1
   r = vec_insert(v, x, i1)
 
-! FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i64>
-! FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-! FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i8>
-! FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! FIR: %[[c:.*]] = arith.constant 2 : i8
-! FIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8
-! FIR: %[[c1:.*]] = arith.constant 1 : i8
-! FIR: %[[sub:.*]] = llvm.sub %[[c1]], %[[urem]] : i8
-! FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[sub]] : i8] : vector<2xi64>
-! FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-
 ! LLVMIR: %[[v:.*]] = load i64, ptr %{{[0-9]}}, align 8
 ! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-insert.f90 b/flang/test/Lower/PowerPC/ppc-vec-insert.f90
index dac186771e67a12..3648be6ac027e30 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-insert.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-insert.f90
@@ -1,6 +1,4 @@
-! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s
-! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
 ! vec_insert
@@ -15,109 +13,40 @@ subroutine vec_insert_testf32(v, x, i1, i2, i4, i8)
   integer(4) :: i4
   integer(8) :: i8
   r = vec_insert(v, x, i1)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<f32>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i8>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i8
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i8] : vector<4xf32>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<f32>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i8) : i8
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i8] : vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[v:.*]] = load float, ptr %{{[0-9]}}, align 4
-! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
-! CHECK: %[[urem:.*]] = urem i8 %[[i1]], 4
-! CHECK: %[[r:.*]] = insertelement <4 x float> %[[x]], float %[[v]], i8 %[[urem]]
-! CHECK: store <4 x float> %[[r]], ptr %{{[0-9]}}, align 16
 
+! LLVMIR: %[[v:.*]] = load float, ptr %{{[0-9]}}, align 4
+! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
+! LLVMIR: %[[urem:.*]] = urem i8 %[[i1]], 4
+! LLVMIR: %[[r:.*]] = insertelement <4 x float> %[[x]], float %[[v]], i8 %[[urem]]
+! LLVMIR: store <4 x float> %[[r]], ptr %{{[0-9]}}, align 16
 
   r = vec_insert(v, x, i2)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<f32>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i16>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i16
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i16] : vector<4xf32>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<f32>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i16) : i16
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i16] : vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[v:.*]] = load float, ptr %{{[0-9]}}, align 4
-! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
-! CHECK: %[[urem:.*]] = urem i16 %[[i2]], 4
-! CHECK: %[[r:.*]] = insertelement <4 x float> %[[x]], float %[[v]], i16 %[[urem]]
-! CHECK: store <4 x float> %[[r]], ptr %{{[0-9]}}, align 16
+
+! LLVMIR: %[[v:.*]] = load float, ptr %{{[0-9]}}, align 4
+! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
+! LLVMIR: %[[urem:.*]] = urem i16 %[[i2]], 4
+! LLVMIR: %[[r:.*]] = insertelement <4 x float> %[[x]], float %[[v]], i16 %[[urem]]
+! LLVMIR: store <4 x float> %[[r]], ptr %{{[0-9]}}, align 16
 
   r = vec_insert(v, x, i4)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<f32>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i32>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i32
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i32] : vector<4xf32>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<f32>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i32) : i32
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i32] : vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[v:.*]] = load float, ptr %{{[0-9]}}, align 4
-! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
-! CHECK: %[[urem:.*]] = urem i32 %[[i4]], 4
-! CHECK: %[[r:.*]] = insertelement <4 x float> %[[x]], float %[[v]], i32 %[[urem]]
-! CHECK: store <4 x float> %[[r]], ptr %{{[0-9]}}, align 16
+
+! LLVMIR: %[[v:.*]] = load float, ptr %{{[0-9]}}, align 4
+! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
+! LLVMIR: %[[urem:.*]] = urem i32 %[[i4]], 4
+! LLVMIR: %[[r:.*]] = insertelement <4 x float> %[[x]], float %[[v]], i32 %[[urem]]
+! LLVMIR: store <4 x float> %[[r]], ptr %{{[0-9]}}, align 16
 
   r = vec_insert(v, x, i8)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<f32>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i64>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i64
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i64] : vector<4xf32>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<f32>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i64>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i64) : i64
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i64] : vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[v:.*]] = load float, ptr %{{[0-9]}}, align 4
-! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
-! CHECK: %[[urem:.*]] = urem i64 %[[i8]], 4
-! CHECK: %[[r:.*]] = insertelement <4 x float> %[[x]], float %[[v]], i64 %[[urem]]
-! CHECK: store <4 x float> %[[r]], ptr %{{[0-9]}}, align 16
+
+! LLVMIR: %[[v:.*]] = load float, ptr %{{[0-9]}}, align 4
+! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
+! LLVMIR: %[[urem:.*]] = urem i64 %[[i8]], 4
+! LLVMIR: %[[r:.*]] = insertelement <4 x float> %[[x]], float %[[v]], i64 %[[urem]]
+! LLVMIR: store <4 x float> %[[r]], ptr %{{[0-9]}}, align 16
 end subroutine vec_insert_testf32
 
 !CHECK-LABEL: vec_insert_testf64
@@ -130,109 +59,40 @@ subroutine vec_insert_testf64(v, x, i1, i2, i4, i8)
   integer(4) :: i4
   integer(8) :: i8
   r = vec_insert(v, x, i1)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<f64>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i8>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i8
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i8] : vector<2xf64>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<f64>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i8) : i8
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i8] : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[v:.*]] = load double, ptr %{{[0-9]}}, align 8
-! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
-! CHECK: %[[urem:.*]] = urem i8 %[[i1]], 2
-! CHECK: %[[r:.*]] = insertelement <2 x double> %[[x]], double %[[v]], i8 %[[urem]]
-! CHECK: store <2 x double> %[[r]], ptr %{{[0-9]}}, align 16
 
+! LLVMIR: %[[v:.*]] = load double, ptr %{{[0-9]}}, align 8
+! LLVMIR: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
+! LLVMIR: %[[urem:.*]] = urem i8 %[[i1]], 2
+! LLVMIR: %[[r:.*]] = insertelement <2 x double> %[[x]], double %[[v]], i8 %[[urem]]
+! LLVMIR: store <2 x double> %[[r]], ptr %{{[0-9]}}, align 16
 
   r = vec_insert(v, x, i2)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<f64>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i16>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i16
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i16] : vector<2xf64>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<f64>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i16) : i16
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i16] : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[v:.*]] = load double, ptr %{{[0-9]}}, align 8
-! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
-! CHECK: %[[urem:.*]] = urem i16 %[[i2]], 2
-! CHECK: %[[r:.*]] = insertelement <2 x double> %[[x]], double %[[v]], i16 %[[urem]]
-! CHECK: store <2 x double> %[[r]], ptr %{{[0-9]}}, align 16
+
+! LLVMIR: %[[v:.*]] = load double, ptr %{{[0-9]}}, align 8
+! LLVMIR: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
+! LLVMIR: %[[urem:.*]] = urem i16 %[[i2]], 2
+! LLVMIR: %[[r:.*]] = insertelement <2 x double> %[[x]], double %[[v]], i16 %[[urem]]
+! LLVMIR: store <2 x double> %[[r]], ptr %{{[0-9]}}, align 16
 
   r = vec_insert(v, x, i4)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<f64>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i32>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i32
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i32] : vector<2xf64>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<f64>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i32) : i32
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i32] : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[v:.*]] = load double, ptr %{{[0-9]}}, align 8
-! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
-! CHECK: %[[urem:.*]] = urem i32 %[[i4]], 2
-! CHECK: %[[r:.*]] = insertelement <2 x double> %[[x]], double %[[v]], i32 %[[urem]]
-! CHECK: store <2 x double> %[[r]], ptr %{{[0-9]}}, align 16
+
+! LLVMIR: %[[v:.*]] = load double, ptr %{{[0-9]}}, align 8
+! LLVMIR: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
+! LLVMIR: %[[urem:.*]] = urem i32 %[[i4]], 2
+! LLVMIR: %[[r:.*]] = insertelement <2 x double> %[[x]], double %[[v]], i32 %[[urem]]
+! LLVMIR: store <2 x double> %[[r]], ptr %{{[0-9]}}, align 16
 
   r = vec_insert(v, x, i8)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<f64>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i64>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i64
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i64] : vector<2xf64>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<f64>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i64>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i64) : i64
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i64] : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[v:.*]] = load double, ptr %{{[0-9]}}, align 8
-! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
-! CHECK: %[[urem:.*]] = urem i64 %[[i8]], 2
-! CHECK: %[[r:.*]] = insertelement <2 x double> %[[x]], double %[[v]], i64 %[[urem]]
-! CHECK: store <2 x double> %[[r]], ptr %{{[0-9]}}, align 16
+
+! LLVMIR: %[[v:.*]] = load double, ptr %{{[0-9]}}, align 8
+! LLVMIR: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
+! LLVMIR: %[[urem:.*]] = urem i64 %[[i8]], 2
+! LLVMIR: %[[r:.*]] = insertelement <2 x double> %[[x]], double %[[v]], i64 %[[urem]]
+! LLVMIR: store <2 x double> %[[r]], ptr %{{[0-9]}}, align 16
 end subroutine vec_insert_testf64
 
 !CHECK-LABEL: vec_insert_testi8
@@ -245,109 +105,40 @@ subroutine vec_insert_testi8(v, x, i1, i2, i4, i8)
   integer(4) :: i4
   integer(8) :: i8
   r = vec_insert(v, x, i1)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i8>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i8>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[c:.*]] = arith.constant 16 : i8
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i8] : vector<16xi8>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(16 : i8) : i8
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i8] : vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load i8, ptr %{{[0-9]}}, align 1
-! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
-! CHECK: %[[urem:.*]] = urem i8 %[[i1]], 16
-! CHECK: %[[r:.*]] = insertelement <16 x i8> %[[x]], i8 %[[v]], i8 %[[urem]]
-! CHECK: store <16 x i8> %[[r]], ptr %{{[0-9]}}, align 16
 
+! LLVMIR: %[[v:.*]] = load i8, ptr %{{[0-9]}}, align 1
+! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
+! LLVMIR: %[[urem:.*]] = urem i8 %[[i1]], 16
+! LLVMIR: %[[r:.*]] = insertelement <16 x i8> %[[x]], i8 %[[v]], i8 %[[urem]]
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{[0-9]}}, align 16
 
   r = vec_insert(v, x, i2)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i8>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i16>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[c:.*]] = arith.constant 16 : i16
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i16] : vector<16xi8>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(16 : i16) : i16
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i16] : vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load i8, ptr %{{[0-9]}}, align 1
-! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
-! CHECK: %[[urem:.*]] = urem i16 %[[i2]], 16
-! CHECK: %[[r:.*]] = insertelement <16 x i8> %[[x]], i8 %[[v]], i16 %[[urem]]
-! CHECK: store <16 x i8> %[[r]], ptr %{{[0-9]}}, align 16
+
+! LLVMIR: %[[v:.*]] = load i8, ptr %{{[0-9]}}, align 1
+! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
+! LLVMIR: %[[urem:.*]] = urem i16 %[[i2]], 16
+! LLVMIR: %[[r:.*]] = insertelement <16 x i8> %[[x]], i8 %[[v]], i16 %[[urem]]
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{[0-9]}}, align 16
 
   r = vec_insert(v, x, i4)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i8>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i32>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[c:.*]] = arith.constant 16 : i32
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i32] : vector<16xi8>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(16 : i32) : i32
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i32] : vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load i8, ptr %{{[0-9]}}, align 1
-! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
-! CHECK: %[[urem:.*]] = urem i32 %[[i4]], 16
-! CHECK: %[[r:.*]] = insertelement <16 x i8> %[[x]], i8 %[[v]], i32 %[[urem]]
-! CHECK: store <16 x i8> %[[r]], ptr %{{[0-9]}}, align 16
+
+! LLVMIR: %[[v:.*]] = load i8, ptr %{{[0-9]}}, align 1
+! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
+! LLVMIR: %[[urem:.*]] = urem i32 %[[i4]], 16
+! LLVMIR: %[[r:.*]] = insertelement <16 x i8> %[[x]], i8 %[[v]], i32 %[[urem]]
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{[0-9]}}, align 16
 
   r = vec_insert(v, x, i8)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i8>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i64>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[c:.*]] = arith.constant 16 : i64
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i64] : vector<16xi8>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i64>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(16 : i64) : i64
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i64] : vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[v:.*]] = load i8, ptr %{{[0-9]}}, align 1
-! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
-! CHECK: %[[urem:.*]] = urem i64 %[[i8]], 16
-! CHECK: %[[r:.*]] = insertelement <16 x i8> %[[x]], i8 %[[v]], i64 %[[urem]]
-! CHECK: store <16 x i8> %[[r]], ptr %{{[0-9]}}, align 16
+
+! LLVMIR: %[[v:.*]] = load i8, ptr %{{[0-9]}}, align 1
+! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
+! LLVMIR: %[[urem:.*]] = urem i64 %[[i8]], 16
+! LLVMIR: %[[r:.*]] = insertelement <16 x i8> %[[x]], i8 %[[v]], i64 %[[urem]]
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{[0-9]}}, align 16
 end subroutine vec_insert_testi8
 
 !CHECK-LABEL: vec_insert_testi16
@@ -360,109 +151,40 @@ subroutine vec_insert_testi16(v, x, i1, i2, i4, i8)
   integer(4) :: i4
   integer(8) :: i8
   r = vec_insert(v, x, i1)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i16>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i8>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[c:.*]] = arith.constant 8 : i8
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i8] : vector<8xi16>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(8 : i8) : i8
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i8] : vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load i16, ptr %{{[0-9]}}, align 2
-! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
-! CHECK: %[[urem:.*]] = urem i8 %[[i1]], 8
-! CHECK: %[[r:.*]] = insertelement <8 x i16> %[[x]], i16 %[[v]], i8 %[[urem]]
-! CHECK: store <8 x i16> %[[r]], ptr %{{[0-9]}}, align 16
 
+! LLVMIR: %[[v:.*]] = load i16, ptr %{{[0-9]}}, align 2
+! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
+! LLVMIR: %[[urem:.*]] = urem i8 %[[i1]], 8
+! LLVMIR: %[[r:.*]] = insertelement <8 x i16> %[[x]], i16 %[[v]], i8 %[[urem]]
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{[0-9]}}, align 16
 
   r = vec_insert(v, x, i2)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i16>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i16>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[c:.*]] = arith.constant 8 : i16
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i16] : vector<8xi16>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(8 : i16) : i16
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i16] : vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load i16, ptr %{{[0-9]}}, align 2
-! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
-! CHECK: %[[urem:.*]] = urem i16 %[[i2]], 8
-! CHECK: %[[r:.*]] = insertelement <8 x i16> %[[x]], i16 %[[v]], i16 %[[urem]]
-! CHECK: store <8 x i16> %[[r]], ptr %{{[0-9]}}, align 16
+
+! LLVMIR: %[[v:.*]] = load i16, ptr %{{[0-9]}}, align 2
+! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
+! LLVMIR: %[[urem:.*]] = urem i16 %[[i2]], 8
+! LLVMIR: %[[r:.*]] = insertelement <8 x i16> %[[x]], i16 %[[v]], i16 %[[urem]]
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{[0-9]}}, align 16
 
   r = vec_insert(v, x, i4)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i16>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i32>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[c:.*]] = arith.constant 8 : i32
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i32] : vector<8xi16>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(8 : i32) : i32
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i32] : vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load i16, ptr %{{[0-9]}}, align 2
-! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
-! CHECK: %[[urem:.*]] = urem i32 %[[i4]], 8
-! CHECK: %[[r:.*]] = insertelement <8 x i16> %[[x]], i16 %[[v]], i32 %[[urem]]
-! CHECK: store <8 x i16> %[[r]], ptr %{{[0-9]}}, align 16
+
+! LLVMIR: %[[v:.*]] = load i16, ptr %{{[0-9]}}, align 2
+! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
+! LLVMIR: %[[urem:.*]] = urem i32 %[[i4]], 8
+! LLVMIR: %[[r:.*]] = insertelement <8 x i16> %[[x]], i16 %[[v]], i32 %[[urem]]
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{[0-9]}}, align 16
 
   r = vec_insert(v, x, i8)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i16>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i64>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[c:.*]] = arith.constant 8 : i64
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i64] : vector<8xi16>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i64>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(8 : i64) : i64
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i64] : vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[v:.*]] = load i16, ptr %{{[0-9]}}, align 2
-! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
-! CHECK: %[[urem:.*]] = urem i64 %[[i8]], 8
-! CHECK: %[[r:.*]] = insertelement <8 x i16> %[[x]], i16 %[[v]], i64 %[[urem]]
-! CHECK: store <8 x i16> %[[r]], ptr %{{[0-9]}}, align 16
+
+! LLVMIR: %[[v:.*]] = load i16, ptr %{{[0-9]}}, align 2
+! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
+! LLVMIR: %[[urem:.*]] = urem i64 %[[i8]], 8
+! LLVMIR: %[[r:.*]] = insertelement <8 x i16> %[[x]], i16 %[[v]], i64 %[[urem]]
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{[0-9]}}, align 16
 end subroutine vec_insert_testi16
 
 !CHECK-LABEL: vec_insert_testi32
@@ -475,111 +197,41 @@ subroutine vec_insert_testi32(v, x, i1, i2, i4, i8)
   integer(4) :: i4
   integer(8) :: i8
   r = vec_insert(v, x, i1)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i32>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i8>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i8
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i8] : vector<4xi32>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i8) : i8
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i8] : vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load i32, ptr %{{[0-9]}}, align 4
-! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
-! CHECK: %[[urem:.*]] = urem i8 %[[i1]], 4
-! CHECK: %[[r:.*]] = insertelement <4 x i32> %[[x]], i32 %[[v]], i8 %[[urem]]
-! CHECK: store <4 x i32> %[[r]], ptr %{{[0-9]}}, align 16
 
+! LLVMIR: %[[v:.*]] = load i32, ptr %{{[0-9]}}, align 4
+! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
+! LLVMIR: %[[urem:.*]] = urem i8 %[[i1]], 4
+! LLVMIR: %[[r:.*]] = insertelement <4 x i32> %[[x]], i32 %[[v]], i8 %[[urem]]
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{[0-9]}}, align 16
 
   r = vec_insert(v, x, i2)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i32>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i16>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i16
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i16] : vector<4xi32>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i16) : i16
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i16] : vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load i32, ptr %{{[0-9]}}, align 4
-! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
-! CHECK: %[[urem:.*]] = urem i16 %[[i2]], 4
-! CHECK: %[[r:.*]] = insertelement <4 x i32> %[[x]], i32 %[[v]], i16 %[[urem]]
-! CHECK: store <4 x i32> %[[r]], ptr %{{[0-9]}}, align 16
+
+! LLVMIR: %[[v:.*]] = load i32, ptr %{{[0-9]}}, align 4
+! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
+! LLVMIR: %[[urem:.*]] = urem i16 %[[i2]], 4
+! LLVMIR: %[[r:.*]] = insertelement <4 x i32> %[[x]], i32 %[[v]], i16 %[[urem]]
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{[0-9]}}, align 16
 
   r = vec_insert(v, x, i4)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i32>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i32>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i32
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i32] : vector<4xi32>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i32) : i32
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i32] : vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load i32, ptr %{{[0-9]}}, align 4
-! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
-! CHECK: %[[urem:.*]] = urem i32 %[[i4]], 4
-! CHECK: %[[r:.*]] = insertelement <4 x i32> %[[x]], i32 %[[v]], i32 %[[urem]]
-! CHECK: store <4 x i32> %[[r]], ptr %{{[0-9]}}, align 16
+
+! LLVMIR: %[[v:.*]] = load i32, ptr %{{[0-9]}}, align 4
+! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
+! LLVMIR: %[[urem:.*]] = urem i32 %[[i4]], 4
+! LLVMIR: %[[r:.*]] = insertelement <4 x i32> %[[x]], i32 %[[v]], i32 %[[urem]]
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{[0-9]}}, align 16
 
   r = vec_insert(v, x, i8)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i32>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i64>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[c:.*]] = arith.constant 4 : i64
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i64] : vector<4xi32>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i64>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(4 : i64) : i64
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i64] : vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[v:.*]] = load i32, ptr %{{[0-9]}}, align 4
-! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
-! CHECK: %[[urem:.*]] = urem i64 %[[i8]], 4
-! CHECK: %[[r:.*]] = insertelement <4 x i32> %[[x]], i32 %[[v]], i64 %[[urem]]
-! CHECK: store <4 x i32> %[[r]], ptr %{{[0-9]}}, align 16
-end subroutine vec_insert_testi32
 
+! LLVMIR: %[[v:.*]] = load i32, ptr %{{[0-9]}}, align 4
+! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
+! LLVMIR: %[[urem:.*]] = urem i64 %[[i8]], 4
+! LLVMIR: %[[r:.*]] = insertelement <4 x i32> %[[x]], i32 %[[v]], i64 %[[urem]]
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{[0-9]}}, align 16
+end subroutine vec_insert_testi32
 
 !CHECK-LABEL: vec_insert_testi64
 subroutine vec_insert_testi64(v, x, i1, i2, i4, i8)
@@ -591,107 +243,38 @@ subroutine vec_insert_testi64(v, x, i1, i2, i4, i8)
   integer(4) :: i4
   integer(8) :: i8
   r = vec_insert(v, x, i1)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i64>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[i1:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i8>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i8
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i8] : vector<2xi64>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i64>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[i1:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i8) : i8
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i1]], %[[c]] : i8
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i8] : vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load i64, ptr %{{[0-9]}}, align 8
-! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
-! CHECK: %[[urem:.*]] = urem i8 %[[i1]], 2
-! CHECK: %[[r:.*]] = insertelement <2 x i64> %[[x]], i64 %[[v]], i8 %[[urem]]
-! CHECK: store <2 x i64> %[[r]], ptr %{{[0-9]}}, align 16
 
+! LLVMIR: %[[v:.*]] = load i64, ptr %{{[0-9]}}, align 8
+! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i1:.*]] = load i8, ptr %{{[0-9]}}, align 1
+! LLVMIR: %[[urem:.*]] = urem i8 %[[i1]], 2
+! LLVMIR: %[[r:.*]] = insertelement <2 x i64> %[[x]], i64 %[[v]], i8 %[[urem]]
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{[0-9]}}, align 16
 
   r = vec_insert(v, x, i2)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i64>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[i2:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i16>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i16
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i16] : vector<2xi64>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i64>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[i2:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i16) : i16
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i2]], %[[c]] : i16
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i16] : vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load i64, ptr %{{[0-9]}}, align 8
-! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
-! CHECK: %[[urem:.*]] = urem i16 %[[i2]], 2
-! CHECK: %[[r:.*]] = insertelement <2 x i64> %[[x]], i64 %[[v]], i16 %[[urem]]
-! CHECK: store <2 x i64> %[[r]], ptr %{{[0-9]}}, align 16
+
+! LLVMIR: %[[v:.*]] = load i64, ptr %{{[0-9]}}, align 8
+! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i2:.*]] = load i16, ptr %{{[0-9]}}, align 2
+! LLVMIR: %[[urem:.*]] = urem i16 %[[i2]], 2
+! LLVMIR: %[[r:.*]] = insertelement <2 x i64> %[[x]], i64 %[[v]], i16 %[[urem]]
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{[0-9]}}, align 16
 
   r = vec_insert(v, x, i4)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i64>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[i4:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i32>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i32
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i32] : vector<2xi64>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i64>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[i4:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i32) : i32
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i4]], %[[c]] : i32
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i32] : vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load i64, ptr %{{[0-9]}}, align 8
-! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
-! CHECK: %[[urem:.*]] = urem i32 %[[i4]], 2
-! CHECK: %[[r:.*]] = insertelement <2 x i64> %[[x]], i64 %[[v]], i32 %[[urem]]
-! CHECK: store <2 x i64> %[[r]], ptr %{{[0-9]}}, align 16
+
+! LLVMIR: %[[v:.*]] = load i64, ptr %{{[0-9]}}, align 8
+! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i4:.*]] = load i32, ptr %{{[0-9]}}, align 4
+! LLVMIR: %[[urem:.*]] = urem i32 %[[i4]], 2
+! LLVMIR: %[[r:.*]] = insertelement <2 x i64> %[[x]], i64 %[[v]], i32 %[[urem]]
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{[0-9]}}, align 16
 
   r = vec_insert(v, x, i8)
-! CHECK-FIR: %[[v:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i64>
-! CHECK-FIR: %[[x:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[i8:.*]] = fir.load %arg{{[0-9]}} : !fir.ref<i64>
-! CHECK-FIR: %[[vr:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[c:.*]] = arith.constant 2 : i64
-! CHECK-FIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64
-! CHECK-FIR: %[[r:.*]] = vector.insertelement %[[v]], %[[vr]][%[[urem]] : i64] : vector<2xi64>
-! CHECK-FIR: %[[r_conv:.*]] = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[r_conv]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[v:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i64>
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[i8:.*]] = llvm.load %arg{{[0-9]}} : !llvm.ptr<i64>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(2 : i64) : i64
-! CHECK-LLVMIR: %[[urem:.*]] = llvm.urem %[[i8]], %[[c]] : i64
-! CHECK-LLVMIR: %[[r:.*]] = llvm.insertelement %[[v]], %[[x]][%[[urem]] : i64] : vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{[0-9]}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[v:.*]] = load i64, ptr %{{[0-9]}}, align 8
-! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
-! CHECK: %[[urem:.*]] = urem i64 %[[i8]], 2
-! CHECK: %[[r:.*]] = insertelement <2 x i64> %[[x]], i64 %[[v]], i64 %[[urem]]
-! CHECK: store <2 x i64> %[[r]], ptr %{{[0-9]}}, align 16
+
+! LLVMIR: %[[v:.*]] = load i64, ptr %{{[0-9]}}, align 8
+! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[i8:.*]] = load i64, ptr %{{[0-9]}}, align 8
+! LLVMIR: %[[urem:.*]] = urem i64 %[[i8]], 2
+! LLVMIR: %[[r:.*]] = insertelement <2 x i64> %[[x]], i64 %[[v]], i64 %[[urem]]
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{[0-9]}}, align 16
 end subroutine vec_insert_testi64

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90
index f4e7f7b1db41dcf..214fe423628d6e5 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90
@@ -1,5 +1,4 @@
-! RUN: %flang_fc1 -emit-fir %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="FIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="LLVMIR" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
 !-------------------
@@ -12,16 +11,6 @@ subroutine vec_ld_testi8(arg1, arg2, res)
   vector(integer(1)) :: arg2, res
   res = vec_ld(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.vector<16:i8>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<16xi8>
-! FIR: %[[undefv:.*]] = fir.undefined vector<16xi8>
-! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
@@ -36,16 +25,6 @@ subroutine vec_ld_testi16(arg1, arg2, res)
   vector(integer(2)) :: arg2, res
   res = vec_ld(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<8xi16>
-! FIR: %[[undefv:.*]] = fir.undefined vector<8xi16>
-! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>, vector<8xi16>
-! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
@@ -60,15 +39,6 @@ subroutine vec_ld_testi32(arg1, arg2, res)
   vector(integer(4)) :: arg2, res
   res = vec_ld(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[undefv:.*]] = fir.undefined vector<4xi32>
-! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
-! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
@@ -82,17 +52,6 @@ subroutine vec_ld_testf32(arg1, arg2, res)
   vector(real(4)) :: arg2, res
   res = vec_ld(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[i4:.*]] = fir.convert %[[arg1]] : (i64) -> i32
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.vector<4:f32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[i4]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
-! FIR: %[[undefv:.*]] = fir.undefined vector<4xf32>
-! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
-! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[i4:.*]] = trunc i64 %[[arg1]] to i32
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[i4]]
@@ -108,15 +67,6 @@ subroutine vec_ld_testu32(arg1, arg2, res)
   vector(unsigned(4)) :: arg2, res
   res = vec_ld(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.vector<4:ui32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[undefv:.*]] = fir.undefined vector<4xi32>
-! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
-! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:ui32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
@@ -131,15 +81,6 @@ subroutine vec_ld_testi32a(arg1, arg2, res)
   vector(integer(4)) :: res
   res = vec_ld(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<10xi32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[undefv:.*]] = fir.undefined vector<4xi32>
-! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
-! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
@@ -154,17 +95,6 @@ subroutine vec_ld_testf32av(arg1, arg2, res)
   vector(real(4)) :: res
   res = vec_ld(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[i4:.*]] = fir.convert %[[arg1]] : (i64) -> i32
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8x!fir.vector<4:f32>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[i4]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
-! FIR: %[[undefv:.*]] = fir.undefined vector<4xf32>
-! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
-! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[i4:.*]] = trunc i64 %[[arg1]] to i32
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[i4]]
@@ -181,16 +111,6 @@ subroutine vec_ld_testi32s(arg1, arg2, res)
   vector(real(4)) :: res
   res = vec_ld(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
-! FIR: %[[undefv:.*]] = fir.undefined vector<4xf32>
-! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
-! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
@@ -210,15 +130,6 @@ subroutine vec_lde_testi8s(arg1, arg2, res)
   vector(integer(1)) :: res
   res = vec_lde(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvebx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
-! FIR: %[[undefv:.*]] = fir.undefined vector<16xi8>
-! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvebx(ptr %[[addr]])
@@ -233,15 +144,6 @@ subroutine vec_lde_testi16a(arg1, arg2, res)
   vector(integer(2)) :: res
   res = vec_lde(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x11x7xi16>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvehx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<8xi16>
-! FIR: %[[undefv:.*]] = fir.undefined vector<8xi16>
-! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>, vector<8xi16>
-! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <8 x i16> @llvm.ppc.altivec.lvehx(ptr %[[addr]])
@@ -256,15 +158,6 @@ subroutine vec_lde_testi32a(arg1, arg2, res)
   vector(integer(4)) :: res
   res = vec_lde(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<5xi32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvewx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[undefv:.*]] = fir.undefined vector<4xi32>
-! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
-! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvewx(ptr %[[addr]])
@@ -279,16 +172,6 @@ subroutine vec_lde_testf32a(arg1, arg2, res)
   vector(real(4)) :: res
   res = vec_lde(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<11xf32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvewx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
-! FIR: %[[undefv:.*]] = fir.undefined vector<4xf32>
-! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
-! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvewx(ptr %[[addr]])
@@ -308,17 +191,6 @@ subroutine vec_lvsl_testi8s(arg1, arg2, res)
   vector(unsigned(1)) :: res
   res = vec_lvsl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
-! FIR: %[[arg1i64:.*]] = fir.convert %[[arg1]] : (i8) -> i64
-! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
-! FIR: %[[lshft:.*]] = arith.shli %[[arg1i64]], %[[fiveSix]] : i64
-! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
 ! LLVMIR: %[[iext:.*]] = sext i8 %[[arg1]] to i64
 ! LLVMIR: %[[lshft:.*]] = shl i64 %[[iext]], 56
@@ -335,17 +207,6 @@ subroutine vec_lvsl_testi16a(arg1, arg2, res)
   vector(unsigned(1)) :: res
   res = vec_lvsl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[arg1i64:.*]] = fir.convert %[[arg1]] : (i16) -> i64
-! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
-! FIR: %[[lshft:.*]] = arith.shli %[[arg1i64]], %[[fiveSix]] : i64
-! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xi16>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[iext:.*]] = sext i16 %[[arg1]] to i64
 ! LLVMIR: %[[lshft:.*]] = shl i64 %[[iext]], 56
@@ -362,17 +223,6 @@ subroutine vec_lvsl_testi32a(arg1, arg2, res)
   vector(unsigned(1)) :: res
   res = vec_lvsl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
-! FIR: %[[arg1i64:.*]] = fir.convert %[[arg1]] : (i32) -> i64
-! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
-! FIR: %[[lshft:.*]] = arith.shli %[[arg1i64]], %[[fiveSix]] : i64
-! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<11x3x4xi32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[iext:.*]] = sext i32 %[[arg1]] to i64
 ! LLVMIR: %[[lshft:.*]] = shl i64 %[[iext]], 56
@@ -389,16 +239,6 @@ subroutine vec_lvsl_testf32a(arg1, arg2, res)
   vector(unsigned(1)) :: res
   res = vec_lvsl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
-! FIR: %[[lshft:.*]] = arith.shli %[[arg1]], %[[fiveSix]] : i64
-! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<51xf32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[lshft:.*]] = shl i64 %[[arg1]], 56
 ! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
@@ -418,17 +258,6 @@ subroutine vec_lvsr_testi8s(arg1, arg2, res)
   vector(unsigned(1)) :: res
   res = vec_lvsr(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
-! FIR: %[[arg1i64:.*]] = fir.convert %[[arg1]] : (i8) -> i64
-! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
-! FIR: %[[lshft:.*]] = arith.shli %[[arg1i64]], %[[fiveSix]] : i64
-! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
 ! LLVMIR: %[[iext:.*]] = sext i8 %[[arg1]] to i64
 ! LLVMIR: %[[lshft:.*]] = shl i64 %[[iext]], 56
@@ -445,17 +274,6 @@ subroutine vec_lvsr_testi16a(arg1, arg2, res)
   vector(unsigned(1)) :: res
   res = vec_lvsr(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[arg1i64:.*]] = fir.convert %[[arg1]] : (i16) -> i64
-! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
-! FIR: %[[lshft:.*]] = arith.shli %[[arg1i64]], %[[fiveSix]] : i64
-! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<41xi16>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[iext:.*]] = sext i16 %[[arg1]] to i64
 ! LLVMIR: %[[lshft:.*]] = shl i64 %[[iext]], 56
@@ -472,17 +290,6 @@ subroutine vec_lvsr_testi32a(arg1, arg2, res)
   vector(unsigned(1)) :: res
   res = vec_lvsr(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
-! FIR: %[[arg1i64:.*]] = fir.convert %[[arg1]] : (i32) -> i64
-! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
-! FIR: %[[lshft:.*]] = arith.shli %[[arg1i64]], %[[fiveSix]] : i64
-! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<23x31x47xi32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[iext:.*]] = sext i32 %[[arg1]] to i64
 ! LLVMIR: %[[lshft:.*]] = shl i64 %[[iext]], 56
@@ -499,16 +306,6 @@ subroutine vec_lvsr_testf32a(arg1, arg2, res)
   vector(unsigned(1)) :: res
   res = vec_lvsr(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
-! FIR: %[[lshft:.*]] = arith.shli %[[arg1]], %[[fiveSix]] : i64
-! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[lshft:.*]] = shl i64 %[[arg1]], 56
 ! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
@@ -528,13 +325,6 @@ subroutine vec_lxv_testi8a(arg1, arg2, res)
   vector(integer(1)) :: res
   res = vec_lxv(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
-! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xi8>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
-
 ! LLVMIR: %[[offset:.*]] = load i8, ptr %0, align 1
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[offset]]
 ! LLVMIR: %[[res:.*]] = load <16 x i8>, ptr %[[addr]], align 1
@@ -548,13 +338,6 @@ subroutine vec_lxv_testi16a(arg1, arg2, res)
   vector(integer(2)) :: res
   res = vec_lxv(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi16>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
-
 ! LLVMIR: %[[offset:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[offset]]
 ! LLVMIR: %[[res:.*]] = load <8 x i16>, ptr %[[addr]], align 1
@@ -568,13 +351,6 @@ subroutine vec_lxv_testi32a(arg1, arg2, res)
   vector(integer(4)) :: res
   res = vec_lxv(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
-! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
-
 ! LLVMIR: %[[offset:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[offset]]
 ! LLVMIR: %[[res:.*]] = load <4 x i32>, ptr %[[addr]], align 1
@@ -588,13 +364,6 @@ subroutine vec_lxv_testf32a(arg1, arg2, res)
   vector(real(4)) :: res
   res = vec_lxv(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
-
 ! LLVMIR: %[[offset:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[offset]]
 ! LLVMIR: %[[res:.*]] = load <4 x float>, ptr %[[addr]], align 1
@@ -608,13 +377,6 @@ subroutine vec_lxv_testf64a(arg1, arg2, res)
   vector(real(8)) :: res
   res = vec_lxv(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf64>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
-
 ! LLVMIR: %[[offset:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[offset]]
 ! LLVMIR: %[[res:.*]] = load <2 x double>, ptr %[[addr]], align 1
@@ -632,14 +394,6 @@ subroutine vec_xl_testi8a(arg1, arg2, res)
   vector(integer(1)) :: res
   res = vec_xl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
-! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[undefv:.*]] = fir.undefined vector<16xi8>
-! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
   
 ! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
@@ -655,15 +409,6 @@ subroutine vec_xl_testi16a(arg1, arg2, res)
   vector(integer(2)) :: res
   res = vec_xl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x8xi16>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[undefv:.*]] = fir.undefined vector<8xi16>
-! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>, vector<8xi16>
-! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = load <8 x i16>, ptr %[[addr]], align 1
@@ -678,13 +423,6 @@ subroutine vec_xl_testi32a(arg1, arg2, res)
   vector(integer(4)) :: res
   res = vec_xl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
-! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]])
@@ -698,14 +436,6 @@ subroutine vec_xl_testi64a(arg1, arg2, res)
   vector(integer(8)) :: res
   res = vec_xl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x1xi64>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<2xi64>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
@@ -720,14 +450,6 @@ subroutine vec_xl_testf32a(arg1, arg2, res)
   vector(real(4)) :: res
   res = vec_xl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]])
@@ -742,13 +464,6 @@ subroutine vec_xl_testf64a(arg1, arg2, res)
   vector(real(8)) :: res
   res = vec_xl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2xf64>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
@@ -766,14 +481,6 @@ subroutine vec_xl_be_testi8a(arg1, arg2, res)
   vector(integer(1)) :: res
   res = vec_xl_be(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
-! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi8>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[undefv:.*]] = fir.undefined vector<16xi8>
-! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
   
 ! LLVMIR: %4 = load i8, ptr %0, align 1
 ! LLVMIR: %5 = getelementptr i8, ptr %1, i8 %4
@@ -789,15 +496,6 @@ subroutine vec_xl_be_testi16a(arg1, arg2, res)
   vector(integer(2)) :: res
   res = vec_xl_be(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<8x2xi16>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[undefv:.*]] = fir.undefined vector<8xi16>
-! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>, vector<8xi16>
-! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
-
 ! LLVMIR: %4 = load i16, ptr %0, align 2
 ! LLVMIR: %5 = getelementptr i8, ptr %1, i16 %4
 ! LLVMIR: %6 = load <8 x i16>, ptr %5, align 1
@@ -812,15 +510,6 @@ subroutine vec_xl_be_testi32a(arg1, arg2, res)
   vector(integer(4)) :: res
   res = vec_xl_be(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
-! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4xi32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[undefv:.*]] = fir.undefined vector<4xi32>
-! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
-! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
-
 ! LLVMIR: %4 = load i32, ptr %0, align 4
 ! LLVMIR: %5 = getelementptr i8, ptr %1, i32 %4
 ! LLVMIR: %6 = load <4 x i32>, ptr %5, align 1
@@ -835,15 +524,6 @@ subroutine vec_xl_be_testi64a(arg1, arg2, res)
   vector(integer(8)) :: res
   res = vec_xl_be(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi64>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[undefv:.*]] = fir.undefined vector<2xi64>
-! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [1, 0] : vector<2xi64>, vector<2xi64>
-! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
-
 ! LLVMIR: %4 = load i64, ptr %0, align 8
 ! LLVMIR: %5 = getelementptr i8, ptr %1, i64 %4
 ! LLVMIR: %6 = load <2 x i64>, ptr %5, align 1
@@ -858,15 +538,6 @@ subroutine vec_xl_be_testf32a(arg1, arg2, res)
   vector(real(4)) :: res
   res = vec_xl_be(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[undefv:.*]] = fir.undefined vector<4xf32>
-! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
-! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
-
 ! LLVMIR: %4 = load i16, ptr %0, align 2
 ! LLVMIR: %5 = getelementptr i8, ptr %1, i16 %4
 ! LLVMIR: %6 = load <4 x float>, ptr %5, align 1
@@ -881,15 +552,6 @@ subroutine vec_xl_be_testf64a(arg1, arg2, res)
   vector(real(8)) :: res
   res = vec_xl_be(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf64>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[undefv:.*]] = fir.undefined vector<2xf64>
-! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [1, 0] : vector<2xf64>, vector<2xf64>
-! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
-
 ! LLVMIR: %4 = load i64, ptr %0, align 8
 ! LLVMIR: %5 = getelementptr i8, ptr %1, i64 %4
 ! LLVMIR: %6 = load <2 x double>, ptr %5, align 1
@@ -908,14 +570,6 @@ subroutine vec_xld2_testi8a(arg1, arg2, res)
   vector(integer(1)) :: res
   res = vec_xld2(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<16:i8>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
@@ -930,14 +584,6 @@ subroutine vec_xld2_testi16a(arg1, arg2, res)
   vector(integer(2)) :: res
   res = vec_xld2(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<8:i16>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<8xi16>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
@@ -952,14 +598,6 @@ subroutine vec_xld2_testi32a(arg1, arg2, res)
   vector(integer(4)) :: res
   res = vec_xld2(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<11x!fir.vector<4:i32>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<4xi32>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
@@ -974,14 +612,6 @@ subroutine vec_xld2_testi64a(arg1, arg2, res)
   vector(integer(8)) :: res
   res = vec_xld2(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<31x7x!fir.vector<2:i64>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<2xi64>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
@@ -996,14 +626,6 @@ subroutine vec_xld2_testf32a(arg1, arg2, res)
   vector(real(4)) :: res
   res = vec_xld2(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<5x!fir.vector<4:f32>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<4xf32>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
@@ -1018,13 +640,6 @@ subroutine vec_xld2_testf64a(arg1, arg2, res)
   vector(real(8)) :: res
   res = vec_xld2(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<2:f64>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
@@ -1042,14 +657,6 @@ subroutine vec_xlw4_testi8a(arg1, arg2, res)
   vector(integer(1)) :: res
   res = vec_xlw4(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x11x37x!fir.vector<16:i8>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]])
@@ -1064,14 +671,6 @@ subroutine vec_xlw4_testi16a(arg1, arg2, res)
   vector(integer(2)) :: res
   res = vec_xlw4(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x8x!fir.vector<8:i16>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<8xi16>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]])
@@ -1086,13 +685,6 @@ subroutine vec_xlw4_testu32a(arg1, arg2, res)
   vector(unsigned(4)) :: res
   res = vec_xlw4(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<8x4x!fir.vector<4:ui32>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:ui32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]])
@@ -1106,14 +698,6 @@ subroutine vec_xlw4_testf32a(arg1, arg2, res)
   vector(real(4)) :: res
   res = vec_xlw4(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.vector<4:f32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]])
@@ -1132,15 +716,6 @@ subroutine vec_xlds_testi64a(arg1, arg2, res)
   vector(integer(8)) :: res
   res = vec_xlds(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[aryref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<2:i64>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[aryref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ref:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<i64>
-! FIR: %[[val:.*]] = fir.load %[[ref]] : !fir.ref<i64>
-! FIR: %[[vsplt:.*]] = vector.splat %[[val]] : vector<2xi64>
-! FIR: %[[res:.*]] = fir.convert %[[vsplt]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = load i64, ptr %[[addr]], align 8
@@ -1156,16 +731,6 @@ subroutine vec_xlds_testf64a(arg1, arg2, res)
   vector(real(8)) :: res
   res = vec_xlds(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[aryref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<2:f64>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[aryref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ref:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<i64>
-! FIR: %[[val:.*]] = fir.load %[[ref]] : !fir.ref<i64>
-! FIR: %[[vsplt:.*]] = vector.splat %[[val]] : vector<2xi64>
-! FIR: %[[bc:.*]] = vector.bitcast %[[vsplt]] : vector<2xi64> to vector<2xf64>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = load i64, ptr %[[addr]], align 8

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-load.f90 b/flang/test/Lower/PowerPC/ppc-vec-load.f90
index 1af8cd39c506b76..1da6381905142ce 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-load.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-load.f90
@@ -1,5 +1,4 @@
-! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="FIR" %s
-! RUN: %flang -emit-llvm -S %s -o - | FileCheck --check-prefixes="LLVMIR" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
 !----------------------
@@ -12,14 +11,6 @@ subroutine vec_ld_testi8(arg1, arg2, res)
   vector(integer(1)) :: arg2, res
   res = vec_ld(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i8>
-! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<16:i8>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<16xi8>
-! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i8, ptr %{{.*}}, align 1
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
 ! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
@@ -33,14 +24,6 @@ subroutine vec_ld_testi16(arg1, arg2, res)
   vector(integer(2)) :: arg2, res
   res = vec_ld(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i16>
-! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<8xi16>
-! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
 ! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
@@ -54,13 +37,6 @@ subroutine vec_ld_testi32(arg1, arg2, res)
   vector(integer(4)) :: arg2, res
   res = vec_ld(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
 ! LLVMIR: %[[bc:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
@@ -73,15 +49,6 @@ subroutine vec_ld_testf32(arg1, arg2, res)
   vector(real(4)) :: arg2, res
   res = vec_ld(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i64>
-! FIR: %[[arg1i32:.*]] = fir.convert %[[arg1]] : (i64) -> i32
-! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:f32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1i32]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
-! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[arg1i32:.*]] = trunc i64 %[[arg1]] to i32
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1i32]]
@@ -96,13 +63,6 @@ subroutine vec_ld_testu32(arg1, arg2, res)
   vector(unsigned(4)) :: arg2, res
   res = vec_ld(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i8>
-! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:ui32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:ui32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
 ! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
@@ -116,13 +76,6 @@ subroutine vec_ld_testi32a(arg1, arg2, res)
   vector(integer(4)) :: res
   res = vec_ld(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.array<10xi32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
 ! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
@@ -136,15 +89,6 @@ subroutine vec_ld_testf32av(arg1, arg2, res)
   vector(real(4)) :: res
   res = vec_ld(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i64>
-! FIR: %[[arg1i32:.*]] = fir.convert %[[arg1]] : (i64) -> i32
-! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.array<2x4x8x!fir.vector<4:f32>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1i32]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
-! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[arg1i32:.*]] = trunc i64 %[[arg1]] to i32
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1i32]]
@@ -160,14 +104,6 @@ subroutine vec_ld_testi32s(arg1, arg2, res)
   vector(real(4)) :: res
   res = vec_ld(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
-! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
 ! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
@@ -186,13 +122,6 @@ subroutine vec_lde_testi8s(arg1, arg2, res)
   vector(integer(1)) :: res
   res = vec_lde(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvebx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
-! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
 ! LLVMIR: %[[call:.*]] = call <16 x i8> @llvm.ppc.altivec.lvebx(ptr %[[addr]])
@@ -206,13 +135,6 @@ subroutine vec_lde_testi16a(arg1, arg2, res)
   vector(integer(2)) :: res
   res = vec_lde(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi16>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvehx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<8xi16>
-! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
 ! LLVMIR: %[[call:.*]] = call <8 x i16> @llvm.ppc.altivec.lvehx(ptr %[[addr]])
@@ -226,13 +148,6 @@ subroutine vec_lde_testi32a(arg1, arg2, res)
   vector(integer(4)) :: res
   res = vec_lde(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xi32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvewx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
 ! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvewx(ptr %[[addr]])
@@ -246,14 +161,6 @@ subroutine vec_lde_testf32a(arg1, arg2, res)
   vector(real(4)) :: res
   res = vec_lde(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvewx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
-! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
 ! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvewx(ptr %[[addr]])
@@ -271,14 +178,6 @@ subroutine vec_ldl_testi8(arg1, arg2, res)
   vector(integer(1)) :: arg2, res
   res = vec_ldl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i8>
-! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<16:i8>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<16xi8>
-! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i8, ptr %{{.*}}, align 1
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
 ! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
@@ -292,14 +191,6 @@ subroutine vec_ldl_testi16(arg1, arg2, res)
   vector(integer(2)) :: arg2, res
   res = vec_ldl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i16>
-! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<8xi16>
-! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
 ! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
@@ -313,13 +204,6 @@ subroutine vec_ldl_testi32(arg1, arg2, res)
   vector(integer(4)) :: arg2, res
   res = vec_ldl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
 ! LLVMIR: %[[bc:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
@@ -332,13 +216,6 @@ subroutine vec_ldl_testf32(arg1, arg2, res)
   vector(real(4)) :: arg2, res
   res = vec_ldl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i64>
-! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:f32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
-! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
 
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
@@ -353,13 +230,6 @@ subroutine vec_ldl_testu32(arg1, arg2, res)
   vector(unsigned(4)) :: arg2, res
   res = vec_ldl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i8>
-! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:ui32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:ui32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
 ! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
@@ -373,13 +243,6 @@ subroutine vec_ldl_testi32a(arg1, arg2, res)
   vector(integer(4)) :: res
   res = vec_ldl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.array<10xi32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
 ! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
@@ -393,14 +256,6 @@ subroutine vec_ldl_testf32av(arg1, arg2, res)
   vector(real(4)) :: res
   res = vec_ldl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i64>
-! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.array<2x4x8x!fir.vector<4:f32>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
-! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
 ! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
@@ -415,14 +270,6 @@ subroutine vec_ldl_testi32s(arg1, arg2, res)
   vector(real(4)) :: res
   res = vec_ldl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
-! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
 ! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
@@ -441,19 +288,6 @@ subroutine vec_lvsl_testi8s(arg1, arg2, res)
   vector(unsigned(1)) :: res
   res = vec_lvsl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
-! FIR: %[[arg1ext:.*]] = fir.convert %[[arg1]] : (i8) -> i64
-! FIR: %[[c56:.*]] = arith.constant 56 : i64
-! FIR: %[[lshft:.*]] = arith.shli %[[arg1ext]], %[[c56]] : i64
-! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
-! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
-! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
 ! LLVMIR: %[[ext:.*]] = sext i8 %[[arg1]] to i64
 ! LLVMIR: %[[lshft:.*]] = shl i64 %[[ext]], 56
@@ -471,19 +305,6 @@ subroutine vec_lvsl_testi16a(arg1, arg2, res)
   vector(unsigned(1)) :: res
   res = vec_lvsl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[arg1ext:.*]] = fir.convert %[[arg1]] : (i16) -> i64
-! FIR: %[[c56:.*]] = arith.constant 56 : i64
-! FIR: %[[lshft:.*]] = arith.shli %[[arg1ext]], %[[c56]] : i64
-! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xi16>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
-! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
-! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[ext:.*]] = sext i16 %[[arg1]] to i64
 ! LLVMIR: %[[lshft:.*]] = shl i64 %[[ext]], 56
@@ -501,19 +322,6 @@ subroutine vec_lvsl_testi32a(arg1, arg2, res)
   vector(unsigned(1)) :: res
   res = vec_lvsl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
-! FIR: %[[arg1ext:.*]] = fir.convert %[[arg1]] : (i32) -> i64
-! FIR: %[[c56:.*]] = arith.constant 56 : i64
-! FIR: %[[lshft:.*]] = arith.shli %[[arg1ext]], %[[c56]] : i64
-! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x3x4xi32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
-! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
-! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[ext:.*]] = sext i32 %[[arg1]] to i64
 ! LLVMIR: %[[lshft:.*]] = shl i64 %[[ext]], 56
@@ -531,18 +339,6 @@ subroutine vec_lvsl_testf32a(arg1, arg2, res)
   vector(unsigned(1)) :: res
   res = vec_lvsl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[c56:.*]] = arith.constant 56 : i64
-! FIR: %[[lshft:.*]] = arith.shli %[[arg1]], %[[c56]] : i64
-! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
-! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
-! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[lshft:.*]] = shl i64 %[[arg1]], 56
 ! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
@@ -563,19 +359,6 @@ subroutine vec_lvsr_testi8s(arg1, arg2, res)
   vector(unsigned(1)) :: res
   res = vec_lvsr(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
-! FIR: %[[arg1ext:.*]] = fir.convert %[[arg1]] : (i8) -> i64
-! FIR: %[[c56:.*]] = arith.constant 56 : i64
-! FIR: %[[lshft:.*]] = arith.shli %[[arg1ext]], %[[c56]] : i64
-! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
-! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
-! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
 ! LLVMIR: %[[ext:.*]] = sext i8 %[[arg1]] to i64
 ! LLVMIR: %[[lshft:.*]] = shl i64 %[[ext]], 56
@@ -593,19 +376,6 @@ subroutine vec_lvsr_testi16a(arg1, arg2, res)
   vector(unsigned(1)) :: res
   res = vec_lvsr(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[arg1ext:.*]] = fir.convert %[[arg1]] : (i16) -> i64
-! FIR: %[[c56:.*]] = arith.constant 56 : i64
-! FIR: %[[lshft:.*]] = arith.shli %[[arg1ext]], %[[c56]] : i64
-! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xi16>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
-! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
-! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[ext:.*]] = sext i16 %[[arg1]] to i64
 ! LLVMIR: %[[lshft:.*]] = shl i64 %[[ext]], 56
@@ -623,19 +393,6 @@ subroutine vec_lvsr_testi32a(arg1, arg2, res)
   vector(unsigned(1)) :: res
   res = vec_lvsr(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
-! FIR: %[[arg1ext:.*]] = fir.convert %[[arg1]] : (i32) -> i64
-! FIR: %[[c56:.*]] = arith.constant 56 : i64
-! FIR: %[[lshft:.*]] = arith.shli %[[arg1ext]], %[[c56]] : i64
-! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x3x4xi32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
-! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
-! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[ext:.*]] = sext i32 %[[arg1]] to i64
 ! LLVMIR: %[[lshft:.*]] = shl i64 %[[ext]], 56
@@ -653,18 +410,6 @@ subroutine vec_lvsr_testf32a(arg1, arg2, res)
   vector(unsigned(1)) :: res
   res = vec_lvsr(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[c56:.*]] = arith.constant 56 : i64
-! FIR: %[[lshft:.*]] = arith.shli %[[arg1]], %[[c56]] : i64
-! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
-! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
-! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[lshft:.*]] = shl i64 %[[arg1]], 56
 ! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
@@ -685,13 +430,6 @@ subroutine vec_lxv_testi8a(arg1, arg2, res)
   vector(integer(1)) :: res
   res = vec_lxv(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xi8>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
-
 ! LLVMIR_P9: %[[arg1:.*]] = load i8, ptr %0, align 1
 ! LLVMIR_P9: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
 ! LLVMIR_P9: %[[ld:.*]] = load <16 x i8>, ptr %[[addr]], align 1
@@ -705,13 +443,6 @@ subroutine vec_lxv_testi16a(arg1, arg2, res)
   vector(integer(2)) :: res
   res = vec_lxv(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi16>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
-
 ! LLVMIR_P9: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR_P9: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
 ! LLVMIR_P9: %[[ld:.*]] = load <8 x i16>, ptr %[[addr]], align 1
@@ -725,13 +456,6 @@ subroutine vec_lxv_testi32a(arg1, arg2, res)
   vector(integer(4)) :: res
   res = vec_lxv(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
-
 ! LLVMIR_P9: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR_P9: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
 ! LLVMIR_P9: %[[ld:.*]] = load <4 x i32>, ptr %[[addr]], align 1
@@ -745,13 +469,6 @@ subroutine vec_lxv_testf32a(arg1, arg2, res)
   vector(real(4)) :: res
   res = vec_lxv(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
-
 ! LLVMIR_P9: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR_P9: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
 ! LLVMIR_P9: %[[ld:.*]] = load <4 x float>, ptr %[[addr]], align 1
@@ -765,13 +482,6 @@ subroutine vec_lxv_testf64a(arg1, arg2, res)
   vector(real(8)) :: res
   res = vec_lxv(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf64>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
-
 ! LLVMIR_P9: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR_P9: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
 ! LLVMIR_P9: %[[ld:.*]] = load <2 x double>, ptr %[[addr]], align 1
@@ -789,14 +499,6 @@ subroutine vec_xld2_testi8a(arg1, arg2, res)
   vector(integer(1)) :: res
   res = vec_xld2(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<16:i8>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
@@ -811,14 +513,6 @@ subroutine vec_xld2_testi16(arg1, arg2, res)
   vector(integer(2)) :: res
   res = vec_xld2(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<8xi16>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
@@ -833,14 +527,6 @@ subroutine vec_xld2_testi32a(arg1, arg2, res)
   vector(integer(4)) :: res
   res = vec_xld2(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<41x!fir.vector<4:i32>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<4xi32>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
@@ -855,14 +541,6 @@ subroutine vec_xld2_testi64a(arg1, arg2, res)
   vector(integer(8)) :: res
   res = vec_xld2(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<2:i64>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<2xi64>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
@@ -877,14 +555,6 @@ subroutine vec_xld2_testf32a(arg1, arg2, res)
   vector(real(4)) :: res
   res = vec_xld2(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<4:f32>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<4xf32>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
@@ -899,13 +569,6 @@ subroutine vec_xld2_testf64a(arg1, arg2, res)
   vector(real(8)) :: res
   res = vec_xld2(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<2:f64>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
@@ -923,13 +586,6 @@ subroutine vec_xl_testi8a(arg1, arg2, res)
   vector(integer(1)) :: res
   res = vec_xl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xi8>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = load <16 x i8>, ptr %[[addr]], align 1
@@ -943,13 +599,6 @@ subroutine vec_xl_testi16a(arg1, arg2, res)
   vector(integer(2)) :: res
   res = vec_xl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi16>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = load <8 x i16>, ptr %[[addr]], align 1
@@ -963,13 +612,6 @@ subroutine vec_xl_testi32a(arg1, arg2, res)
   vector(integer(4)) :: res
   res = vec_xl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]])
@@ -983,14 +625,6 @@ subroutine vec_xl_testi64a(arg1, arg2, res)
   vector(integer(8)) :: res
   res = vec_xl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi64>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<2xi64>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
@@ -1005,14 +639,6 @@ subroutine vec_xl_testf32a(arg1, arg2, res)
   vector(real(4)) :: res
   res = vec_xl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]])
@@ -1027,13 +653,6 @@ subroutine vec_xl_testf64a(arg1, arg2, res)
   vector(real(8)) :: res
   res = vec_xl(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<f64>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
@@ -1051,15 +670,6 @@ subroutine vec_xlds_testi64a(arg1, arg2, res)
   vector(integer(8)) :: res
   res = vec_xlds(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<2:i64>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[cnv:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<i64>
-! FIR: %[[ld:.*]] = fir.load %[[cnv]] : !fir.ref<i64>
-! FIR: %[[vsplt:.*]] = vector.splat %[[ld]] : vector<2xi64>
-! FIR: %[[res:.*]] = fir.convert %[[vsplt]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = load i64, ptr %[[addr]], align 8
@@ -1075,16 +685,6 @@ subroutine vec_xlds_testf64a(arg1, arg2, res)
   vector(real(8)) :: res
   res = vec_xlds(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<2:f64>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[cnv:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<i64>
-! FIR: %[[ld:.*]] = fir.load %[[cnv]] : !fir.ref<i64>
-! FIR: %[[vsplt:.*]] = vector.splat %[[ld]] : vector<2xi64>
-! FIR: %[[bc:.*]] = vector.bitcast %[[vsplt]] : vector<2xi64> to vector<2xf64>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = load i64, ptr %[[addr]], align 8
@@ -1105,15 +705,6 @@ subroutine vec_xl_be_testi8a(arg1, arg2, res)
   vector(integer(1)) :: res
   res = vec_xl_be(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
-! FIR: %[[uarr:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi8>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[uarr]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[uv:.*]] = fir.undefined vector<16xi8>
-! FIR: %[[shff:.*]] = vector.shuffle %[[ld]], %[[uv]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[shff]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = load <16 x i8>, ptr %[[addr]], align 1
@@ -1128,15 +719,6 @@ subroutine vec_xl_be_testi16a(arg1, arg2, res)
   vector(integer(2)) :: res
   res = vec_xl_be(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[uarr:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi16>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[uarr]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[uv:.*]] = fir.undefined vector<8xi16>
-! FIR: %[[shff:.*]] = vector.shuffle %[[ld]], %[[uv]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>, vector<8xi16>
-! FIR: %[[res:.*]] = fir.convert %[[shff]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = load <8 x i16>, ptr %[[addr]], align 1
@@ -1151,15 +733,6 @@ subroutine vec_xl_be_testi32a(arg1, arg2, res)
   vector(integer(4)) :: res
   res = vec_xl_be(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
-! FIR: %[[uarr:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[uarr]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[uv:.*]] = fir.undefined vector<4xi32>
-! FIR: %[[shff:.*]] = vector.shuffle %[[ld]], %[[uv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
-! FIR: %[[res:.*]] = fir.convert %[[shff]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
 ! LLVMIR:  %[[ld:.*]] = load <4 x i32>, ptr %[[addr]], align 1
@@ -1174,15 +747,6 @@ subroutine vec_xl_be_testi64a(arg1, arg2, res)
   vector(integer(8)) :: res
   res = vec_xl_be(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[uarr:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi64>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[uarr]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[uv:.*]] = fir.undefined vector<2xi64>
-! FIR: %[[shff:.*]] = vector.shuffle %[[ld]], %[[uv]] [1, 0] : vector<2xi64>, vector<2xi64>
-! FIR: %[[res:.*]] = fir.convert %[[shff]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
 ! LLVMIR:  %[[ld:.*]] = load <2 x i64>, ptr %[[addr]], align 1
@@ -1197,15 +761,6 @@ subroutine vec_xl_be_testf32a(arg1, arg2, res)
   vector(real(4)) :: res
   res = vec_xl_be(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[uarr:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[uarr]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[uv:.*]] = fir.undefined vector<4xf32>
-! FIR: %[[shff:.*]] = vector.shuffle %[[ld]], %[[uv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
-! FIR: %[[res:.*]] = fir.convert %[[shff]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
 ! LLVMIR:  %[[ld:.*]] = load <4 x float>, ptr %[[addr]], align 1
@@ -1220,15 +775,6 @@ subroutine vec_xl_be_testf64a(arg1, arg2, res)
   vector(real(8)) :: res
   res = vec_xl_be(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[uarr:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<7xf64>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[uarr]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
-! FIR: %[[uv:.*]] = fir.undefined vector<2xf64>
-! FIR: %[[shff:.*]] = vector.shuffle %[[ld]], %[[uv]] [1, 0] : vector<2xf64>, vector<2xf64>
-! FIR: %[[res:.*]] = fir.convert %[[shff]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
-
 ! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
 ! LLVMIR:  %[[ld:.*]] = load <2 x double>, ptr %[[addr]], align 1
@@ -1247,14 +793,6 @@ subroutine vec_xlw4_testi8a(arg1, arg2, res)
   vector(integer(1)) :: res
   res = vec_xlw4(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8x!fir.vector<16:i8>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<16xi8>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
-
 ! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]])
@@ -1269,14 +807,6 @@ subroutine vec_xlw4_testi16a(arg1, arg2, res)
   vector(integer(2)) :: res
   res = vec_xlw4(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8x!fir.vector<8:i16>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<8xi16>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]])
@@ -1291,13 +821,6 @@ subroutine vec_xlw4_testu32a(arg1, arg2, res)
   vector(unsigned(4)) :: res
   res = vec_xlw4(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8x!fir.vector<4:ui32>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:ui32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]])
@@ -1311,14 +834,6 @@ subroutine vec_xlw4_testf32a(arg1, arg2, res)
   vector(real(4)) :: res
   res = vec_xlw4(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<4:f32>>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
-! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
-
 ! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]])

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-max-min-madd-nmsub.f90 b/flang/test/Lower/PowerPC/ppc-vec-max-min-madd-nmsub.f90
new file mode 100644
index 000000000000000..2b2542954636732
--- /dev/null
+++ b/flang/test/Lower/PowerPC/ppc-vec-max-min-madd-nmsub.f90
@@ -0,0 +1,334 @@
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
+! REQUIRES: target=powerpc{{.*}}
+
+! vec_max
+
+! CHECK-LABEL: vec_max_testf32
+subroutine vec_max_testf32(x, y)
+  vector(real(4)) :: vmax, x, y
+  vmax = vec_max(x, y)
+
+! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmax:.*]] = call contract <4 x float> @llvm.ppc.vsx.xvmaxsp(<4 x float> %[[x]], <4 x float> %[[y]])
+! LLVMIR: store <4 x float> %[[vmax]], ptr %{{[0-9]}}, align 16
+end subroutine vec_max_testf32
+
+! CHECK-LABEL: vec_max_testf64
+subroutine vec_max_testf64(x, y)
+  vector(real(8)) :: vmax, x, y
+  vmax = vec_max(x, y)
+
+! LLVMIR: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmax:.*]] = call contract <2 x double> @llvm.ppc.vsx.xvmaxdp(<2 x double> %[[x]], <2 x double> %[[y]])
+! LLVMIR: store <2 x double> %[[vmax]], ptr %{{[0-9]}}, align 16
+end subroutine vec_max_testf64
+
+! CHECK-LABEL: vec_max_testi8
+subroutine vec_max_testi8(x, y)
+  vector(integer(1)) :: vmax, x, y
+  vmax = vec_max(x, y)
+
+! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmax:.*]] = call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %[[x]], <16 x i8> %[[y]])
+! LLVMIR: store <16 x i8> %[[vmax]], ptr %{{[0-9]}}, align 16
+end subroutine vec_max_testi8
+
+! CHECK-LABEL: vec_max_testi16
+subroutine vec_max_testi16(x, y)
+  vector(integer(2)) :: vmax, x, y
+  vmax = vec_max(x, y)
+
+! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmax:.*]] = call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %[[x]], <8 x i16> %[[y]])
+! LLVMIR: store <8 x i16> %[[vmax]], ptr %{{[0-9]}}, align 16
+end subroutine vec_max_testi16
+
+! CHECK-LABEL: vec_max_testi32
+subroutine vec_max_testi32(x, y)
+  vector(integer(4)) :: vmax, x, y
+  vmax = vec_max(x, y)
+
+! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmax:.*]] = call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %[[x]], <4 x i32> %[[y]])
+! LLVMIR: store <4 x i32> %[[vmax]], ptr %{{[0-9]}}, align 16
+end subroutine vec_max_testi32
+
+! CHECK-LABEL: vec_max_testi64
+subroutine vec_max_testi64(x, y)
+  vector(integer(8)) :: vmax, x, y
+  vmax = vec_max(x, y)
+
+! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmax:.*]] = call <2 x i64> @llvm.ppc.altivec.vmaxsd(<2 x i64> %[[x]], <2 x i64> %[[y]])
+! LLVMIR: store <2 x i64> %[[vmax]], ptr %{{[0-9]}}, align 16
+end subroutine vec_max_testi64
+
+! CHECK-LABEL: vec_max_testui8
+subroutine vec_max_testui8(x, y)
+  vector(unsigned(1)) :: vmax, x, y
+  vmax = vec_max(x, y)
+
+! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmax:.*]] = call <16 x i8> @llvm.ppc.altivec.vmaxub(<16 x i8> %[[x]], <16 x i8> %[[y]])
+! LLVMIR: store <16 x i8> %[[vmax]], ptr %{{[0-9]}}, align 16
+end subroutine vec_max_testui8
+
+! CHECK-LABEL: vec_max_testui16
+subroutine vec_max_testui16(x, y)
+  vector(unsigned(2)) :: vmax, x, y
+  vmax = vec_max(x, y)
+
+! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmax:.*]] = call <8 x i16> @llvm.ppc.altivec.vmaxuh(<8 x i16> %[[x]], <8 x i16> %[[y]])
+! LLVMIR: store <8 x i16> %[[vmax]], ptr %{{[0-9]}}, align 16
+end subroutine vec_max_testui16
+
+! CHECK-LABEL: vec_max_testui32
+subroutine vec_max_testui32(x, y)
+  vector(unsigned(4)) :: vmax, x, y
+  vmax = vec_max(x, y)
+
+! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmax:.*]] = call <4 x i32> @llvm.ppc.altivec.vmaxuw(<4 x i32> %[[x]], <4 x i32> %[[y]])
+! LLVMIR: store <4 x i32> %[[vmax]], ptr %{{[0-9]}}, align 16
+end subroutine vec_max_testui32
+
+! CHECK-LABEL: vec_max_testui64
+subroutine vec_max_testui64(x, y)
+  vector(unsigned(8)) :: vmax, x, y
+  vmax = vec_max(x, y)
+
+! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmax:.*]] = call <2 x i64> @llvm.ppc.altivec.vmaxud(<2 x i64> %[[x]], <2 x i64> %[[y]])
+! LLVMIR: store <2 x i64> %[[vmax]], ptr %{{[0-9]}}, align 16
+end subroutine vec_max_testui64
+
+! vec_min
+
+! CHECK-LABEL: vec_min_testf32
+subroutine vec_min_testf32(x, y)
+  vector(real(4)) :: vmin, x, y
+  vmin = vec_min(x, y)
+
+! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmin:.*]] = call contract <4 x float> @llvm.ppc.vsx.xvminsp(<4 x float> %[[x]], <4 x float> %[[y]])
+! LLVMIR: store <4 x float> %[[vmin]], ptr %{{[0-9]}}, align 16
+end subroutine vec_min_testf32
+
+! CHECK-LABEL: vec_min_testf64
+subroutine vec_min_testf64(x, y)
+  vector(real(8)) :: vmin, x, y
+  vmin = vec_min(x, y)
+
+! LLVMIR: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmin:.*]] = call contract <2 x double> @llvm.ppc.vsx.xvmindp(<2 x double> %[[x]], <2 x double> %[[y]])
+! LLVMIR: store <2 x double> %[[vmin]], ptr %{{[0-9]}}, align 16
+end subroutine vec_min_testf64
+
+! CHECK-LABEL: vec_min_testi8
+subroutine vec_min_testi8(x, y)
+  vector(integer(1)) :: vmin, x, y
+  vmin = vec_min(x, y)
+
+! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmin:.*]] = call <16 x i8> @llvm.ppc.altivec.vminsb(<16 x i8> %[[x]], <16 x i8> %[[y]])
+! LLVMIR: store <16 x i8> %[[vmin]], ptr %{{[0-9]}}, align 16
+end subroutine vec_min_testi8
+
+! CHECK-LABEL: vec_min_testi16
+subroutine vec_min_testi16(x, y)
+  vector(integer(2)) :: vmin, x, y
+  vmin = vec_min(x, y)
+
+! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmin:.*]] = call <8 x i16> @llvm.ppc.altivec.vminsh(<8 x i16> %[[x]], <8 x i16> %[[y]])
+! LLVMIR: store <8 x i16> %[[vmin]], ptr %{{[0-9]}}, align 16
+end subroutine vec_min_testi16
+
+! CHECK-LABEL: vec_min_testi32
+subroutine vec_min_testi32(x, y)
+  vector(integer(4)) :: vmin, x, y
+  vmin = vec_min(x, y)
+
+! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmin:.*]] = call <4 x i32> @llvm.ppc.altivec.vminsw(<4 x i32> %[[x]], <4 x i32> %[[y]])
+! LLVMIR: store <4 x i32> %[[vmin]], ptr %{{[0-9]}}, align 16
+end subroutine vec_min_testi32
+
+! CHECK-LABEL: vec_min_testi64
+subroutine vec_min_testi64(x, y)
+  vector(integer(8)) :: vmin, x, y
+  vmin = vec_min(x, y)
+
+! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmin:.*]] = call <2 x i64> @llvm.ppc.altivec.vminsd(<2 x i64> %[[x]], <2 x i64> %[[y]])
+! LLVMIR: store <2 x i64> %[[vmin]], ptr %{{[0-9]}}, align 16
+end subroutine vec_min_testi64
+
+! CHECK-LABEL: vec_min_testui8
+subroutine vec_min_testui8(x, y)
+  vector(unsigned(1)) :: vmin, x, y
+  vmin = vec_min(x, y)
+
+! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmin:.*]] = call <16 x i8> @llvm.ppc.altivec.vminub(<16 x i8> %[[x]], <16 x i8> %[[y]])
+! LLVMIR: store <16 x i8> %[[vmin]], ptr %{{[0-9]}}, align 16
+end subroutine vec_min_testui8
+
+! CHECK-LABEL: vec_min_testui16
+subroutine vec_min_testui16(x, y)
+  vector(unsigned(2)) :: vmin, x, y
+  vmin = vec_min(x, y)
+
+! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmin:.*]] = call <8 x i16> @llvm.ppc.altivec.vminuh(<8 x i16> %[[x]], <8 x i16> %[[y]])
+! LLVMIR: store <8 x i16> %[[vmin]], ptr %{{[0-9]}}, align 16
+end subroutine vec_min_testui16
+
+! CHECK-LABEL: vec_min_testui32
+subroutine vec_min_testui32(x, y)
+  vector(unsigned(4)) :: vmin, x, y
+  vmin = vec_min(x, y)
+
+! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmin:.*]] = call <4 x i32> @llvm.ppc.altivec.vminuw(<4 x i32> %[[x]], <4 x i32> %[[y]])
+! LLVMIR: store <4 x i32> %[[vmin]], ptr %{{[0-9]}}, align 16
+end subroutine vec_min_testui32
+
+! CHECK-LABEL: vec_min_testui64
+subroutine vec_min_testui64(x, y)
+  vector(unsigned(8)) :: vmin, x, y
+  vmin = vec_min(x, y)
+
+! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmin:.*]] = call <2 x i64> @llvm.ppc.altivec.vminud(<2 x i64> %[[x]], <2 x i64> %[[y]])
+! LLVMIR: store <2 x i64> %[[vmin]], ptr %{{[0-9]}}, align 16
+end subroutine vec_min_testui64
+
+! vec_madd
+
+! CHECK-LABEL: vec_madd_testf32
+subroutine vec_madd_testf32(x, y, z)
+  vector(real(4)) :: vmsum, x, y, z
+  vmsum = vec_madd(x, y, z)
+
+! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[z:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmsum:.*]] = call contract <4 x float> @llvm.fma.v4f32(<4 x float> %[[x]], <4 x float> %[[y]], <4 x float> %[[z]])
+! LLVMIR: store <4 x float> %[[vmsum]], ptr %{{[0-9]}}, align 16
+end subroutine vec_madd_testf32
+
+! CHECK-LABEL: vec_madd_testf64
+subroutine vec_madd_testf64(x, y, z)
+  vector(real(8)) :: vmsum, x, y, z
+  vmsum = vec_madd(x, y, z)
+
+! LLVMIR: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[z:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vmsum:.*]] = call contract <2 x double> @llvm.fma.v2f64(<2 x double> %[[x]], <2 x double> %[[y]], <2 x double> %[[z]])
+! LLVMIR: store <2 x double> %[[vmsum]], ptr %{{[0-9]}}, align 16
+end subroutine vec_madd_testf64
+
+! vec_nmsub
+
+! CHECK-LABEL: vec_nmsub_testf32
+subroutine vec_nmsub_testf32(x, y, z)
+  vector(real(4)) :: vnmsub, x, y, z
+  vnmsub = vec_nmsub(x, y, z)
+
+! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[z:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vnmsub:.*]] = call contract <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float> %[[x]], <4 x float> %[[y]], <4 x float> %[[z]])
+! LLVMIR: store <4 x float> %[[vnmsub]], ptr %{{[0-9]}}, align 16
+end subroutine vec_nmsub_testf32
+
+! CHECK-LABEL: vec_nmsub_testf64
+subroutine vec_nmsub_testf64(x, y, z)
+  vector(real(8)) :: vnmsub, x, y, z
+  vnmsub = vec_nmsub(x, y, z)
+
+! LLVMIR: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[z:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[vnmsub:.*]] = call contract <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double> %[[x]], <2 x double> %[[y]], <2 x double> %[[z]])
+! LLVMIR: store <2 x double> %[[vnmsub]], ptr %{{[0-9]}}, align 16
+end subroutine vec_nmsub_testf64
+
+! vec_msub
+
+! CHECK-LABEL: vec_msub_testf32
+subroutine vec_msub_testf32(x, y, z)
+  vector(real(4)) :: vmsub, x, y, z
+  vmsub = vec_msub(x, y, z)
+
+! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[z:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[nz:.*]] = fneg contract <4 x float> %[[z]]
+! LLVMIR: %[[vmsub:.*]] = call contract <4 x float> @llvm.fma.v4f32(<4 x float> %[[x]], <4 x float> %[[y]], <4 x float> %[[nz]])
+! LLVMIR: store <4 x float> %[[vmsub]], ptr %{{[0-9]}}, align 16
+end subroutine vec_msub_testf32
+
+! CHECK-LABEL: vec_msub_testf64
+subroutine vec_msub_testf64(x, y, z)
+  vector(real(8)) :: vmsub, x, y, z
+  vmsub = vec_msub(x, y, z)
+
+! LLVMIR: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[z:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[nz:.*]] = fneg contract <2 x double> %[[z]]
+! LLVMIR: %[[vmsub:.*]] = call contract <2 x double> @llvm.fma.v2f64(<2 x double> %[[x]], <2 x double> %[[y]], <2 x double> %[[nz]])
+! LLVMIR: store <2 x double> %[[vmsub]], ptr %{{[0-9]}}, align 16
+end subroutine vec_msub_testf64
+
+! vec_nmadd
+
+! CHECK-LABEL: vec_nmadd_testf32
+subroutine vec_nmadd_testf32(x, y, z)
+  vector(real(4)) :: vnmsum, x, y, z
+  vnmsum = vec_nmadd(x, y, z)
+
+! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[z:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[msum:.*]] = call contract <4 x float> @llvm.fma.v4f32(<4 x float> %[[x]], <4 x float> %[[y]], <4 x float> %[[z]])
+! LLVMIR: %[[vnmsum:.*]] = fneg contract <4 x float> %[[msum]]
+! LLVMIR: store <4 x float> %[[vnmsum]], ptr %{{[0-9]}}, align 16
+end subroutine vec_nmadd_testf32
+
+! CHECK-LABEL: vec_nmadd_testf64
+subroutine vec_nmadd_testf64(x, y, z)
+  vector(real(8)) :: vnmsum, x, y, z
+  vnmsum = vec_nmadd(x, y, z)
+
+! LLVMIR: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[z:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! LLVMIR: %[[msum:.*]] = call contract <2 x double> @llvm.fma.v2f64(<2 x double> %[[x]], <2 x double> %[[y]], <2 x double> %[[z]])
+! LLVMIR: %[[vnmsum:.*]] = fneg contract <2 x double> %[[msum]]
+! LLVMIR: store <2 x double> %[[vnmsum]], ptr %{{[0-9]}}, align 16
+end subroutine vec_nmadd_testf64

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-merge-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-merge-elem-order.f90
index fa0e0c62a608ea8..07cdfa13ef9ee7e 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-merge-elem-order.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-merge-elem-order.f90
@@ -1,5 +1,4 @@
-! RUN: %flang_fc1 -emit-fir %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="FIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="LLVMIR" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
 !-----------------
@@ -11,14 +10,6 @@ subroutine vec_mergeh_test_i4(arg1, arg2)
   vector(integer(4)) :: arg1, arg2, r
   r = vec_mergeh(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [6, 2, 7, 3] : vector<4xi32>, vector<4xi32>
-! FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
 ! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
 ! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
 ! LLVMIR: %[[r:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> %[[arg2]], <4 x i32> <i32 6, i32 2, i32 7, i32 3>
@@ -34,14 +25,6 @@ subroutine vec_mergel_test_r8(arg1, arg2)
   vector(real(8)) :: arg1, arg2, r
   r = vec_mergel(arg1, arg2)
 
-! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [2, 0] : vector<2xf64>, vector<2xf64>
-! FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
 ! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
 ! LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
 ! LLVMIR: %[[r:.*]] = shufflevector <2 x double> %[[arg1]], <2 x double> %[[arg2]], <2 x i32> <i32 2, i32 0>

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-merge.f90 b/flang/test/Lower/PowerPC/ppc-vec-merge.f90
index 1eaf1750b2252e2..0f950379e8c0f5c 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-merge.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-merge.f90
@@ -1,6 +1,4 @@
-! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s
-! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
 !------------
@@ -12,23 +10,10 @@ subroutine vec_mergeh_test_i1(arg1, arg2)
   vector(integer(1)) :: arg1, arg2, r
   r = vec_mergeh(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] : vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_mergeh_test_i1
 
 ! CHECK-LABEL: vec_mergeh_test_i2
@@ -36,23 +21,10 @@ subroutine vec_mergeh_test_i2(arg1, arg2)
   vector(integer(2)) :: arg1, arg2, r
   r = vec_mergeh(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 8, 1, 9, 2, 10, 3, 11] : vector<8xi16>, vector<8xi16>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 8, 1, 9, 2, 10, 3, 11] : vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <8 x i16> %[[arg1]], <8 x i16> %[[arg2]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <8 x i16> %[[arg1]], <8 x i16> %[[arg2]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_mergeh_test_i2
 
 ! CHECK-LABEL: vec_mergeh_test_i4
@@ -60,23 +32,10 @@ subroutine vec_mergeh_test_i4(arg1, arg2)
   vector(integer(4)) :: arg1, arg2, r
   r = vec_mergeh(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 4, 1, 5] : vector<4xi32>, vector<4xi32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 4, 1, 5] : vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> %[[arg2]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> %[[arg2]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_mergeh_test_i4
 
 ! CHECK-LABEL: vec_mergeh_test_i8
@@ -84,23 +43,10 @@ subroutine vec_mergeh_test_i8(arg1, arg2)
   vector(integer(8)) :: arg1, arg2, r
   r = vec_mergeh(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 2] : vector<2xi64>, vector<2xi64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 2] : vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 0, i32 2>
-! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 0, i32 2>
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_mergeh_test_i8
 
 ! CHECK-LABEL: vec_mergeh_test_u1
@@ -108,23 +54,10 @@ subroutine vec_mergeh_test_u1(arg1, arg2)
   vector(unsigned(1)) :: arg1, arg2, r
   r = vec_mergeh(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] : vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_mergeh_test_u1
 
 ! CHECK-LABEL: vec_mergeh_test_u2
@@ -132,23 +65,10 @@ subroutine vec_mergeh_test_u2(arg1, arg2)
   vector(unsigned(2)) :: arg1, arg2, r
   r = vec_mergeh(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 8, 1, 9, 2, 10, 3, 11] : vector<8xi16>, vector<8xi16>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 8, 1, 9, 2, 10, 3, 11] : vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <8 x i16> %[[arg1]], <8 x i16> %[[arg2]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <8 x i16> %[[arg1]], <8 x i16> %[[arg2]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_mergeh_test_u2
 
 ! CHECK-LABEL: vec_mergeh_test_u4
@@ -156,23 +76,10 @@ subroutine vec_mergeh_test_u4(arg1, arg2)
   vector(unsigned(4)) :: arg1, arg2, r
   r = vec_mergeh(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 4, 1, 5] : vector<4xi32>, vector<4xi32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 4, 1, 5] : vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> %[[arg2]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> %[[arg2]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_mergeh_test_u4
 
 ! CHECK-LABEL: vec_mergeh_test_u8
@@ -180,23 +87,10 @@ subroutine vec_mergeh_test_u8(arg1, arg2)
   vector(unsigned(8)) :: arg1, arg2, r
   r = vec_mergeh(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 2] : vector<2xi64>, vector<2xi64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 2] : vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 0, i32 2>
-! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 0, i32 2>
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_mergeh_test_u8
 
 ! CHECK-LABEL: vec_mergeh_test_r4
@@ -204,23 +98,10 @@ subroutine vec_mergeh_test_r4(arg1, arg2)
   vector(real(4)) :: arg1, arg2, r
   r = vec_mergeh(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 4, 1, 5] : vector<4xf32>, vector<4xf32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 4, 1, 5] : vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <4 x float> %[[arg1]], <4 x float> %[[arg2]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <4 x float> %[[arg1]], <4 x float> %[[arg2]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+! LLVMIR: store <4 x float> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_mergeh_test_r4
 
 ! CHECK-LABEL: vec_mergeh_test_r8
@@ -228,23 +109,10 @@ subroutine vec_mergeh_test_r8(arg1, arg2)
   vector(real(8)) :: arg1, arg2, r
   r = vec_mergeh(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 2] : vector<2xf64>, vector<2xf64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 2] : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <2 x double> %[[arg1]], <2 x double> %[[arg2]], <2 x i32> <i32 0, i32 2>
-! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <2 x double> %[[arg1]], <2 x double> %[[arg2]], <2 x i32> <i32 0, i32 2>
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_mergeh_test_r8
 
 !------------
@@ -256,23 +124,10 @@ subroutine vec_mergel_test_i1(arg1, arg2)
   vector(integer(1)) :: arg1, arg2, r
   r = vec_mergel(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] : vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_mergel_test_i1
 
 ! CHECK-LABEL: vec_mergel_test_i2
@@ -280,23 +135,10 @@ subroutine vec_mergel_test_i2(arg1, arg2)
   vector(integer(2)) :: arg1, arg2, r
   r = vec_mergel(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [4, 12, 5, 13, 6, 14, 7, 15] : vector<8xi16>, vector<8xi16>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [4, 12, 5, 13, 6, 14, 7, 15] : vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <8 x i16> %[[arg1]], <8 x i16> %[[arg2]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <8 x i16> %[[arg1]], <8 x i16> %[[arg2]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_mergel_test_i2
 
 ! CHECK-LABEL: vec_mergel_test_i4
@@ -304,23 +146,10 @@ subroutine vec_mergel_test_i4(arg1, arg2)
   vector(integer(4)) :: arg1, arg2, r
   r = vec_mergel(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [2, 6, 3, 7] : vector<4xi32>, vector<4xi32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [2, 6, 3, 7] : vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> %[[arg2]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> %[[arg2]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_mergel_test_i4
 
 ! CHECK-LABEL: vec_mergel_test_i8
@@ -328,23 +157,10 @@ subroutine vec_mergel_test_i8(arg1, arg2)
   vector(integer(8)) :: arg1, arg2, r
   r = vec_mergel(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [1, 3] : vector<2xi64>, vector<2xi64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [1, 3] : vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 1, i32 3>
-! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 1, i32 3>
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_mergel_test_i8
 
 ! CHECK-LABEL: vec_mergel_test_u1
@@ -352,23 +168,10 @@ subroutine vec_mergel_test_u1(arg1, arg2)
   vector(unsigned(1)) :: arg1, arg2, r
   r = vec_mergel(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] : vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_mergel_test_u1
 
 ! CHECK-LABEL: vec_mergel_test_u2
@@ -376,23 +179,10 @@ subroutine vec_mergel_test_u2(arg1, arg2)
   vector(unsigned(2)) :: arg1, arg2, r
   r = vec_mergel(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [4, 12, 5, 13, 6, 14, 7, 15] : vector<8xi16>, vector<8xi16>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [4, 12, 5, 13, 6, 14, 7, 15] : vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <8 x i16> %[[arg1]], <8 x i16> %[[arg2]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-! CHECK: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <8 x i16> %[[arg1]], <8 x i16> %[[arg2]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+! LLVMIR: store <8 x i16> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_mergel_test_u2
 
 ! CHECK-LABEL: vec_mergel_test_u4
@@ -400,23 +190,10 @@ subroutine vec_mergel_test_u4(arg1, arg2)
   vector(unsigned(4)) :: arg1, arg2, r
   r = vec_mergel(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [2, 6, 3, 7] : vector<4xi32>, vector<4xi32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [2, 6, 3, 7] : vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> %[[arg2]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-! CHECK: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> %[[arg2]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+! LLVMIR: store <4 x i32> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_mergel_test_u4
 
 ! CHECK-LABEL: vec_mergel_test_u8
@@ -424,23 +201,10 @@ subroutine vec_mergel_test_u8(arg1, arg2)
   vector(unsigned(8)) :: arg1, arg2, r
   r = vec_mergel(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [1, 3] : vector<2xi64>, vector<2xi64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [1, 3] : vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 1, i32 3>
-! CHECK: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 1, i32 3>
+! LLVMIR: store <2 x i64> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_mergel_test_u8
 
 ! CHECK-LABEL: vec_mergel_test_r4
@@ -448,23 +212,10 @@ subroutine vec_mergel_test_r4(arg1, arg2)
   vector(real(4)) :: arg1, arg2, r
   r = vec_mergel(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [2, 6, 3, 7] : vector<4xf32>, vector<4xf32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [2, 6, 3, 7] : vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <4 x float> %[[arg1]], <4 x float> %[[arg2]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-! CHECK: store <4 x float> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <4 x float> %[[arg1]], <4 x float> %[[arg2]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+! LLVMIR: store <4 x float> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_mergel_test_r4
 
 ! CHECK-LABEL: vec_mergel_test_r8
@@ -472,21 +223,8 @@ subroutine vec_mergel_test_r8(arg1, arg2)
   vector(real(8)) :: arg1, arg2, r
   r = vec_mergel(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [1, 3] : vector<2xf64>, vector<2xf64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [1, 3] : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[r]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <2 x double> %[[arg1]], <2 x double> %[[arg2]], <2 x i32> <i32 1, i32 3>
-! CHECK: store <2 x double> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <2 x double> %[[arg1]], <2 x double> %[[arg2]], <2 x i32> <i32 1, i32 3>
+! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_mergel_test_r8

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-perm-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-perm-elem-order.f90
index d823ec35f81da64..f0ec054e13d8bf4 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-perm-elem-order.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-perm-elem-order.f90
@@ -1,5 +1,4 @@
-! RUN: %flang_fc1 -emit-fir %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="FIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -fno-ppc-native-vector-element-order -triple ppc64le-unknwon-linux -o - | FileCheck --check-prefixes="LLVMIR" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -fno-ppc-native-vector-element-order -triple ppc64le-unknwon-linux -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
 !----------------
@@ -12,20 +11,6 @@ subroutine vec_perm_test_i1(arg1, arg2, arg3)
   vector(unsigned(1)) :: arg3
   r = vec_perm(arg1, arg2, arg3)
 
-! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<16xi8> to vector<4xi32>
-! FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<16xi8> to vector<4xi32>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[barg1]], %[[barg2]], %[[carg3]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32>
-! FIR: %[[vcall:.*]] = fir.convert %[[call]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! FIR: %[[bcall:.*]] = llvm.bitcast %[[vcall]] : vector<4xi32> to vector<16xi8>
-! FIR: %[[ccall:.*]] = fir.convert %[[bcall]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! FIR: fir.store %[[ccall]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
 ! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
 ! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
 ! LLVMIR: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
@@ -45,14 +30,6 @@ subroutine vec_permi_test_i8i2(arg1, arg2, arg3)
   vector(integer(8)) :: arg1, arg2, r
   r = vec_permi(arg1, arg2, 2_2)
 
-! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! FIR: %[[shuf:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [3, 0] : vector<2xi64>, vector<2xi64>
-! FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
 ! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
 ! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
 ! LLVMIR: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 3, i32 0>

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-perm.f90 b/flang/test/Lower/PowerPC/ppc-vec-perm.f90
index a799b252b3a45a7..99a6295a014943f 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-perm.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-perm.f90
@@ -1,6 +1,4 @@
-! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s
-! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
 ! CHECK-LABEL: vec_perm_test_i1
@@ -9,44 +7,15 @@ subroutine vec_perm_test_i1(arg1, arg2, arg3)
   vector(unsigned(1)) :: arg3
   r = vec_perm(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[const:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vconst:.*]] = vector.broadcast %[[const]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[xor:.*]] = arith.xori %[[carg3]], %[[vconst]] : vector<16xi8>
-! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[call2:.*]] = fir.convert %[[call]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcall:.*]] = llvm.bitcast %[[call2]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[ccall:.*]] = fir.convert %[[bcall]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[ccall]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[const:.*]] = llvm.mlir.constant(-1 : i8) : i8
-! CHECK-LLVMIR: %[[vconst:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3]], %[[vconst]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[call:.*]] = llvm.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> vector<4xi32>
-! CHECK-LLVMIR: %[[bcall:.*]] = llvm.bitcast %[[call]] : vector<4xi32> to vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[bcall]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[barg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[xor:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-! CHECK: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]])
-! CHECK: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <16 x i8>
-! CHECK: store <16 x i8> %[[bcall]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[barg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[xor:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]])
+! LLVMIR: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[bcall]], ptr %{{.*}}, align 16
 end subroutine vec_perm_test_i1
 
 ! CHECK-LABEL: vec_perm_test_i2
@@ -55,44 +24,15 @@ subroutine vec_perm_test_i2(arg1, arg2, arg3)
   vector(unsigned(1)) :: arg3
   r = vec_perm(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[const:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vconst:.*]] = vector.broadcast %[[const]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[xor:.*]] = arith.xori %[[carg3]], %[[vconst]] : vector<16xi8>
-! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[call2:.*]] = fir.convert %[[call]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcall:.*]] = llvm.bitcast %[[call2]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %[[ccall:.*]] = fir.convert %[[bcall]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[ccall]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[const:.*]] = llvm.mlir.constant(-1 : i8) : i8
-! CHECK-LLVMIR: %[[vconst:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3]], %[[vconst]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[call:.*]] = llvm.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> vector<4xi32>
-! CHECK-LLVMIR: %[[bcall:.*]] = llvm.bitcast %[[call]] : vector<4xi32> to vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[bcall]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[xor:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-! CHECK: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]])
-! CHECK: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <8 x i16>
-! CHECK: store <8 x i16> %[[bcall]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[xor:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]])
+! LLVMIR: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[bcall]], ptr %{{.*}}, align 16
 end subroutine vec_perm_test_i2
 
 ! CHECK-LABEL: vec_perm_test_i4
@@ -101,33 +41,12 @@ subroutine vec_perm_test_i4(arg1, arg2, arg3)
   vector(unsigned(1)) :: arg3
   r = vec_perm(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[const:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vconst:.*]] = vector.broadcast %[[const]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[xor:.*]] = arith.xori %[[carg3]], %[[vconst]] : vector<16xi8>
-! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[carg2]], %[[carg1]], %[[xor]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[call]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[const:.*]] = llvm.mlir.constant(-1 : i8) : i8
-! CHECK-LLVMIR: %[[vconst:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3]], %[[vconst]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[call:.*]] = llvm.call @llvm.ppc.altivec.vperm(%[[arg2]], %[[arg1]], %[[xor]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[call]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[xor:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-! CHECK: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[arg2]], <4 x i32> %[[arg1]], <16 x i8> %[[xor]])
-! CHECK: store <4 x i32> %[[call]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[xor:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[arg2]], <4 x i32> %[[arg1]], <16 x i8> %[[xor]])
+! LLVMIR: store <4 x i32> %[[call]], ptr %{{.*}}, align 16
 end subroutine vec_perm_test_i4
 
 ! CHECK-LABEL: vec_perm_test_i8
@@ -136,44 +55,15 @@ subroutine vec_perm_test_i8(arg1, arg2, arg3)
   vector(unsigned(1)) :: arg3
   r = vec_perm(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xi64> to vector<4xi32>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xi64> to vector<4xi32>
-! CHECK-FIR: %[[const:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vconst:.*]] = vector.broadcast %[[const]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[xor:.*]] = arith.xori %[[carg3]], %[[vconst]] : vector<16xi8>
-! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[call2:.*]] = fir.convert %[[call]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcall:.*]] = llvm.bitcast %[[call2]] : vector<4xi32> to vector<2xi64>
-! CHECK-FIR: %[[ccall:.*]] = fir.convert %[[bcall]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[ccall]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<2xi64> to vector<4xi32>
-! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<2xi64> to vector<4xi32>
-! CHECK-LLVMIR: %[[const:.*]] = llvm.mlir.constant(-1 : i8) : i8
-! CHECK-LLVMIR: %[[vconst:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3]], %[[vconst]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[call:.*]] = llvm.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> vector<4xi32>
-! CHECK-LLVMIR: %[[bcall:.*]] = llvm.bitcast %[[call]] : vector<4xi32> to vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[bcall]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <4 x i32>
-! CHECK: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <4 x i32>
-! CHECK: %[[xor:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-! CHECK: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]])
-! CHECK: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <2 x i64>
-! CHECK: store <2 x i64> %[[bcall]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[xor:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]])
+! LLVMIR: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[bcall]], ptr %{{.*}}, align 16
 end subroutine vec_perm_test_i8
 
 ! CHECK-LABEL: vec_perm_test_u1
@@ -182,44 +72,15 @@ subroutine vec_perm_test_u1(arg1, arg2, arg3)
   vector(unsigned(1)) :: arg3
   r = vec_perm(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[const:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vconst:.*]] = vector.broadcast %[[const]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[xor:.*]] = arith.xori %[[carg3]], %[[vconst]] : vector<16xi8>
-! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[call2:.*]] = fir.convert %[[call]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcall:.*]] = llvm.bitcast %[[call2]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[ccall:.*]] = fir.convert %[[bcall]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[ccall]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[const:.*]] = llvm.mlir.constant(-1 : i8) : i8
-! CHECK-LLVMIR: %[[vconst:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3]], %[[vconst]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[call:.*]] = llvm.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> vector<4xi32>
-! CHECK-LLVMIR: %[[bcall:.*]] = llvm.bitcast %[[call]] : vector<4xi32> to vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[bcall]], %{{.*}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[barg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[xor:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-! CHECK: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]])
-! CHECK: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <16 x i8>
-! CHECK: store <16 x i8> %[[bcall]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[barg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[xor:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]])
+! LLVMIR: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[bcall]], ptr %{{.*}}, align 16
 end subroutine vec_perm_test_u1
 
 ! CHECK-LABEL: vec_perm_test_u2
@@ -228,44 +89,15 @@ subroutine vec_perm_test_u2(arg1, arg2, arg3)
   vector(unsigned(1)) :: arg3
   r = vec_perm(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[const:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vconst:.*]] = vector.broadcast %[[const]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[xor:.*]] = arith.xori %[[carg3]], %[[vconst]] : vector<16xi8>
-! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[call2:.*]] = fir.convert %[[call]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcall:.*]] = llvm.bitcast %[[call2]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %[[ccall:.*]] = fir.convert %[[bcall]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[ccall]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[const:.*]] = llvm.mlir.constant(-1 : i8) : i8
-! CHECK-LLVMIR: %[[vconst:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3]], %[[vconst]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[call:.*]] = llvm.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> vector<4xi32>
-! CHECK-LLVMIR: %[[bcall:.*]] = llvm.bitcast %[[call]] : vector<4xi32> to vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[bcall]], %{{.*}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[xor:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-! CHECK: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]])
-! CHECK: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <8 x i16>
-! CHECK: store <8 x i16> %[[bcall]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[xor:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]])
+! LLVMIR: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[bcall]], ptr %{{.*}}, align 16
 end subroutine vec_perm_test_u2
 
 ! CHECK-LABEL: vec_perm_test_u4
@@ -274,35 +106,12 @@ subroutine vec_perm_test_u4(arg1, arg2, arg3)
   vector(unsigned(1)) :: arg3
   r = vec_perm(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[const:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vconst:.*]] = vector.broadcast %[[const]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[xor:.*]] = arith.xori %[[carg3]], %[[vconst]] : vector<16xi8>
-! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[carg2]], %[[carg1]], %[[xor]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[call2:.*]] = fir.convert %[[call]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[ccall:.*]] = fir.convert %[[call2]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[ccall]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[const:.*]] = llvm.mlir.constant(-1 : i8) : i8
-! CHECK-LLVMIR: %[[vconst:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3]], %[[vconst]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[call:.*]] = llvm.call @llvm.ppc.altivec.vperm(%[[arg2]], %[[arg1]], %[[xor]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[call]], %{{.*}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[xor:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-! CHECK: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[arg2]], <4 x i32> %[[arg1]], <16 x i8> %[[xor]])
-! CHECK: store <4 x i32> %[[call]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[xor:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[arg2]], <4 x i32> %[[arg1]], <16 x i8> %[[xor]])
+! LLVMIR: store <4 x i32> %[[call]], ptr %{{.*}}, align 16
 end subroutine vec_perm_test_u4
 
 ! CHECK-LABEL: vec_perm_test_u8
@@ -311,44 +120,15 @@ subroutine vec_perm_test_u8(arg1, arg2, arg3)
   vector(unsigned(1)) :: arg3
   r = vec_perm(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xi64> to vector<4xi32>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xi64> to vector<4xi32>
-! CHECK-FIR: %[[const:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vconst:.*]] = vector.broadcast %[[const]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[xor:.*]] = arith.xori %[[carg3]], %[[vconst]] : vector<16xi8>
-! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[call2:.*]] = fir.convert %[[call]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcall:.*]] = llvm.bitcast %[[call2]] : vector<4xi32> to vector<2xi64>
-! CHECK-FIR: %[[ccall:.*]] = fir.convert %[[bcall]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[ccall]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<2xi64> to vector<4xi32>
-! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<2xi64> to vector<4xi32>
-! CHECK-LLVMIR: %[[const:.*]] = llvm.mlir.constant(-1 : i8) : i8
-! CHECK-LLVMIR: %[[vconst:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3]], %[[vconst]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[call:.*]] = llvm.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> vector<4xi32>
-! CHECK-LLVMIR: %[[bcall:.*]] = llvm.bitcast %[[call]] : vector<4xi32> to vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[bcall]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <4 x i32>
-! CHECK: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <4 x i32>
-! CHECK: %[[xor:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-! CHECK: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]])
-! CHECK: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <2 x i64>
-! CHECK: store <2 x i64> %[[bcall]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[xor:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]])
+! LLVMIR: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[bcall]], ptr %{{.*}}, align 16
 end subroutine vec_perm_test_u8
 
 ! CHECK-LABEL: vec_perm_test_r4
@@ -357,44 +137,15 @@ subroutine vec_perm_test_r4(arg1, arg2, arg3)
   vector(unsigned(1)) :: arg3
   r = vec_perm(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<4xi32>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<4xi32>
-! CHECK-FIR: %[[const:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vconst:.*]] = vector.broadcast %[[const]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[xor:.*]] = arith.xori %[[carg3]], %[[vconst]] : vector<16xi8>
-! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[call2:.*]] = fir.convert %[[call]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcall:.*]] = llvm.bitcast %[[call2]] : vector<4xi32> to vector<4xf32>
-! CHECK-FIR: %[[ccall:.*]] = fir.convert %[[bcall]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[ccall]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<4xi32>
-! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<4xf32> to vector<4xi32>
-! CHECK-LLVMIR: %[[const:.*]] = llvm.mlir.constant(-1 : i8) : i8
-! CHECK-LLVMIR: %[[vconst:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3]], %[[vconst]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[call:.*]] = llvm.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> vector<4xi32>
-! CHECK-LLVMIR: %[[bcall:.*]] = llvm.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[bcall]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <4 x i32>
-! CHECK: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <4 x i32>
-! CHECK: %[[xor:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-! CHECK: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]])
-! CHECK: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <4 x float>
-! CHECK: store <4 x float> %[[bcall]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[xor:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]])
+! LLVMIR: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bcall]], ptr %{{.*}}, align 16
 end subroutine vec_perm_test_r4
 
 ! CHECK-LABEL: vec_perm_test_r8
@@ -403,44 +154,15 @@ subroutine vec_perm_test_r8(arg1, arg2, arg3)
   vector(unsigned(1)) :: arg3
   r = vec_perm(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[carg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xf64> to vector<4xi32>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xf64> to vector<4xi32>
-! CHECK-FIR: %[[const:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vconst:.*]] = vector.broadcast %[[const]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[xor:.*]] = arith.xori %[[carg3]], %[[vconst]] : vector<16xi8>
-! CHECK-FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[call2:.*]] = fir.convert %[[call]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcall:.*]] = llvm.bitcast %[[call2]] : vector<4xi32> to vector<2xf64>
-! CHECK-FIR: %[[ccall:.*]] = fir.convert %[[bcall]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[ccall]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<2xf64> to vector<4xi32>
-! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<2xf64> to vector<4xi32>
-! CHECK-LLVMIR: %[[const:.*]] = llvm.mlir.constant(-1 : i8) : i8
-! CHECK-LLVMIR: %[[vconst:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3]], %[[vconst]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[call:.*]] = llvm.call @llvm.ppc.altivec.vperm(%[[barg2]], %[[barg1]], %[[xor]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>, vector<16xi8>) -> vector<4xi32>
-! CHECK-LLVMIR: %[[bcall:.*]] = llvm.bitcast %[[call]] : vector<4xi32> to vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[bcall]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <2 x double> %[[arg1]] to <4 x i32>
-! CHECK: %[[barg2:.*]] = bitcast <2 x double> %[[arg2]] to <4 x i32>
-! CHECK: %[[xor:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-! CHECK: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]])
-! CHECK: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <2 x double>
-! CHECK: store <2 x double> %[[bcall]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <2 x double> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[barg2:.*]] = bitcast <2 x double> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[xor:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> %[[barg2]], <4 x i32> %[[barg1]], <16 x i8> %[[xor]])
+! LLVMIR: %[[bcall:.*]] = bitcast <4 x i32> %[[call]] to <2 x double>
+! LLVMIR: store <2 x double> %[[bcall]], ptr %{{.*}}, align 16
 end subroutine vec_perm_test_r8
 
 ! CHECK-LABEL: vec_permi_test_i8i1
@@ -448,23 +170,10 @@ subroutine vec_permi_test_i8i1(arg1, arg2, arg3)
   vector(integer(8)) :: arg1, arg2, r
   r = vec_permi(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [1, 3] : vector<2xi64>, vector<2xi64>
-! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [1, 3] : vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 1, i32 3>
-! CHECK: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 1, i32 3>
+! LLVMIR: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16
 end subroutine vec_permi_test_i8i1
 
 ! CHECK-LABEL: vec_permi_test_i8i2
@@ -472,23 +181,10 @@ subroutine vec_permi_test_i8i2(arg1, arg2, arg3)
   vector(integer(8)) :: arg1, arg2, r
   r = vec_permi(arg1, arg2, 2_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [1, 2] : vector<2xi64>, vector<2xi64>
-! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [1, 2] : vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 1, i32 2>
-! CHECK: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 1, i32 2>
+! LLVMIR: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16
 end subroutine vec_permi_test_i8i2
 
 ! CHECK-LABEL: vec_permi_test_i8i4
@@ -496,23 +192,10 @@ subroutine vec_permi_test_i8i4(arg1, arg2, arg3)
   vector(integer(8)) :: arg1, arg2, r
   r = vec_permi(arg1, arg2, 1_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 3] : vector<2xi64>, vector<2xi64>
-! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 3] : vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 0, i32 3>
-! CHECK: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 0, i32 3>
+! LLVMIR: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16
 end subroutine vec_permi_test_i8i4
 
 ! CHECK-LABEL: vec_permi_test_i8i8
@@ -520,23 +203,10 @@ subroutine vec_permi_test_i8i8(arg1, arg2, arg3)
   vector(integer(8)) :: arg1, arg2, r
   r = vec_permi(arg1, arg2, 0_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 2] : vector<2xi64>, vector<2xi64>
-! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 2] : vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 0, i32 2>
-! CHECK: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 0, i32 2>
+! LLVMIR: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16
 end subroutine vec_permi_test_i8i8
 
 ! CHECK-LABEL: vec_permi_test_u8i1
@@ -544,23 +214,10 @@ subroutine vec_permi_test_u8i1(arg1, arg2, arg3)
   vector(unsigned(8)) :: arg1, arg2, r
   r = vec_permi(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [1, 3] : vector<2xi64>, vector<2xi64>
-! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [1, 3] : vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 1, i32 3>
-! CHECK: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 1, i32 3>
+! LLVMIR: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16
 end subroutine vec_permi_test_u8i1
 
 ! CHECK-LABEL: vec_permi_test_u8i2
@@ -568,23 +225,10 @@ subroutine vec_permi_test_u8i2(arg1, arg2, arg3)
   vector(unsigned(8)) :: arg1, arg2, r
   r = vec_permi(arg1, arg2, 2_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [1, 2] : vector<2xi64>, vector<2xi64>
-! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [1, 2] : vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 1, i32 2>
-! CHECK: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 1, i32 2>
+! LLVMIR: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16
 end subroutine vec_permi_test_u8i2
 
 ! CHECK-LABEL: vec_permi_test_u8i4
@@ -592,23 +236,10 @@ subroutine vec_permi_test_u8i4(arg1, arg2, arg3)
   vector(unsigned(8)) :: arg1, arg2, r
   r = vec_permi(arg1, arg2, 1_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 3] : vector<2xi64>, vector<2xi64>
-! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 3] : vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 0, i32 3>
-! CHECK: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 0, i32 3>
+! LLVMIR: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16
 end subroutine vec_permi_test_u8i4
 
 ! CHECK-LABEL: vec_permi_test_u8i8
@@ -616,23 +247,10 @@ subroutine vec_permi_test_u8i8(arg1, arg2, arg3)
   vector(unsigned(8)) :: arg1, arg2, r
   r = vec_permi(arg1, arg2, 0_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [0, 2] : vector<2xi64>, vector<2xi64>
-! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 2] : vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 0, i32 2>
-! CHECK: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[shuf:.*]] = shufflevector <2 x i64> %[[arg1]], <2 x i64> %[[arg2]], <2 x i32> <i32 0, i32 2>
+! LLVMIR: store <2 x i64> %[[shuf]], ptr %{{.*}}, align 16
 end subroutine vec_permi_test_u8i8
 
 ! CHECK-LABEL: vec_permi_test_r4i1
@@ -640,32 +258,13 @@ subroutine vec_permi_test_r4i1(arg1, arg2, arg3)
   vector(real(4)) :: arg1, arg2, r
   r = vec_permi(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<2xf64>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<2xf64>
-! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [1, 3] : vector<2xf64>, vector<2xf64>
-! CHECK-FIR: %[[bshuf:.*]] = llvm.bitcast %[[shuf]] : vector<2xf64> to vector<4xf32>
-! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[bshuf]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<2xf64>
-! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<4xf32> to vector<2xf64>
-! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[barg1]], %[[barg2]] [1, 3] : vector<2xf64>
-! CHECK-LLVMIR: %[[bshuf:.*]] = llvm.bitcast %[[shuf]] : vector<2xf64> to vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[bshuf]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <2 x double>
-! CHECK: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <2 x double>
-! CHECK: %[[shuf:.*]] = shufflevector <2 x double> %[[barg1]], <2 x double> %[[barg2]], <2 x i32> <i32 1, i32 3>
-! CHECK: %[[bshuf:.*]] = bitcast <2 x double> %[[shuf]] to <4 x float>
-! CHECK: store <4 x float> %[[bshuf]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <2 x double>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <2 x double>
+! LLVMIR: %[[shuf:.*]] = shufflevector <2 x double> %[[barg1]], <2 x double> %[[barg2]], <2 x i32> <i32 1, i32 3>
+! LLVMIR: %[[bshuf:.*]] = bitcast <2 x double> %[[shuf]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bshuf]], ptr %{{.*}}, align 16
 end subroutine vec_permi_test_r4i1
 
 ! CHECK-LABEL: vec_permi_test_r4i2
@@ -673,32 +272,13 @@ subroutine vec_permi_test_r4i2(arg1, arg2, arg3)
   vector(real(4)) :: arg1, arg2, r
   r = vec_permi(arg1, arg2, 2_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<2xf64>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<2xf64>
-! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [1, 2] : vector<2xf64>, vector<2xf64>
-! CHECK-FIR: %[[bshuf:.*]] = llvm.bitcast %[[shuf]] : vector<2xf64> to vector<4xf32>
-! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[bshuf]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<2xf64>
-! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<4xf32> to vector<2xf64>
-! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[barg1]], %[[barg2]] [1, 2] : vector<2xf64>
-! CHECK-LLVMIR: %[[bshuf:.*]] = llvm.bitcast %[[shuf]] : vector<2xf64> to vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[bshuf]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <2 x double>
-! CHECK: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <2 x double>
-! CHECK: %[[shuf:.*]] = shufflevector <2 x double> %[[barg1]], <2 x double> %[[barg2]], <2 x i32> <i32 1, i32 2>
-! CHECK: %[[bshuf:.*]] = bitcast <2 x double> %[[shuf]] to <4 x float>
-! CHECK: store <4 x float> %[[bshuf]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <2 x double>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <2 x double>
+! LLVMIR: %[[shuf:.*]] = shufflevector <2 x double> %[[barg1]], <2 x double> %[[barg2]], <2 x i32> <i32 1, i32 2>
+! LLVMIR: %[[bshuf:.*]] = bitcast <2 x double> %[[shuf]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bshuf]], ptr %{{.*}}, align 16
 end subroutine vec_permi_test_r4i2
 
 ! CHECK-LABEL: vec_permi_test_r4i4
@@ -706,32 +286,13 @@ subroutine vec_permi_test_r4i4(arg1, arg2, arg3)
   vector(real(4)) :: arg1, arg2, r
   r = vec_permi(arg1, arg2, 1_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<2xf64>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<2xf64>
-! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [0, 3] : vector<2xf64>, vector<2xf64>
-! CHECK-FIR: %[[bshuf:.*]] = llvm.bitcast %[[shuf]] : vector<2xf64> to vector<4xf32>
-! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[bshuf]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<2xf64>
-! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<4xf32> to vector<2xf64>
-! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[barg1]], %[[barg2]] [0, 3] : vector<2xf64>
-! CHECK-LLVMIR: %[[bshuf:.*]] = llvm.bitcast %[[shuf]] : vector<2xf64> to vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[bshuf]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <2 x double>
-! CHECK: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <2 x double>
-! CHECK: %[[shuf:.*]] = shufflevector <2 x double> %[[barg1]], <2 x double> %[[barg2]], <2 x i32> <i32 0, i32 3>
-! CHECK: %[[bshuf:.*]] = bitcast <2 x double> %[[shuf]] to <4 x float>
-! CHECK: store <4 x float> %[[bshuf]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <2 x double>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <2 x double>
+! LLVMIR: %[[shuf:.*]] = shufflevector <2 x double> %[[barg1]], <2 x double> %[[barg2]], <2 x i32> <i32 0, i32 3>
+! LLVMIR: %[[bshuf:.*]] = bitcast <2 x double> %[[shuf]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bshuf]], ptr %{{.*}}, align 16
 end subroutine vec_permi_test_r4i4
 
 ! CHECK-LABEL: vec_permi_test_r4i8
@@ -739,32 +300,13 @@ subroutine vec_permi_test_r4i8(arg1, arg2, arg3)
   vector(real(4)) :: arg1, arg2, r
   r = vec_permi(arg1, arg2, 0_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<2xf64>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<2xf64>
-! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [0, 2] : vector<2xf64>, vector<2xf64>
-! CHECK-FIR: %[[bshuf:.*]] = llvm.bitcast %[[shuf]] : vector<2xf64> to vector<4xf32>
-! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[bshuf]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[barg1:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<2xf64>
-! CHECK-LLVMIR: %[[barg2:.*]] = llvm.bitcast %[[arg2]] : vector<4xf32> to vector<2xf64>
-! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[barg1]], %[[barg2]] [0, 2] : vector<2xf64>
-! CHECK-LLVMIR: %[[bshuf:.*]] = llvm.bitcast %[[shuf]] : vector<2xf64> to vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[bshuf]], %{{.*}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <2 x double>
-! CHECK: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <2 x double>
-! CHECK: %[[shuf:.*]] = shufflevector <2 x double> %[[barg1]], <2 x double> %[[barg2]], <2 x i32> <i32 0, i32 2>
-! CHECK: %[[bshuf:.*]] = bitcast <2 x double> %[[shuf]] to <4 x float>
-! CHECK: store <4 x float> %[[bshuf]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <2 x double>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <2 x double>
+! LLVMIR: %[[shuf:.*]] = shufflevector <2 x double> %[[barg1]], <2 x double> %[[barg2]], <2 x i32> <i32 0, i32 2>
+! LLVMIR: %[[bshuf:.*]] = bitcast <2 x double> %[[shuf]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bshuf]], ptr %{{.*}}, align 16
 end subroutine vec_permi_test_r4i8
 
 ! CHECK-LABEL: vec_permi_test_r8i1
@@ -772,25 +314,10 @@ subroutine vec_permi_test_r8i1(arg1, arg2, arg3)
   vector(real(8)) :: arg1, arg2, r
   r = vec_permi(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xf64> to vector<2xf64>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xf64> to vector<2xf64>
-! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [1, 3] : vector<2xf64>, vector<2xf64>
-! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [1, 3] : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[shuf:.*]] = shufflevector <2 x double> %[[arg1]], <2 x double> %[[arg2]], <2 x i32> <i32 1, i32 3>
-! CHECK: store <2 x double> %[[shuf]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[shuf:.*]] = shufflevector <2 x double> %[[arg1]], <2 x double> %[[arg2]], <2 x i32> <i32 1, i32 3>
+! LLVMIR: store <2 x double> %[[shuf]], ptr %{{.*}}, align 16
 end subroutine vec_permi_test_r8i1
 
 ! CHECK-LABEL: vec_permi_test_r8i2
@@ -798,25 +325,10 @@ subroutine vec_permi_test_r8i2(arg1, arg2, arg3)
   vector(real(8)) :: arg1, arg2, r
   r = vec_permi(arg1, arg2, 2_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xf64> to vector<2xf64>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xf64> to vector<2xf64>
-! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [1, 2] : vector<2xf64>, vector<2xf64>
-! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [1, 2] : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[shuf:.*]] = shufflevector <2 x double> %[[arg1]], <2 x double> %[[arg2]], <2 x i32> <i32 1, i32 2>
-! CHECK: store <2 x double> %[[shuf]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[shuf:.*]] = shufflevector <2 x double> %[[arg1]], <2 x double> %[[arg2]], <2 x i32> <i32 1, i32 2>
+! LLVMIR: store <2 x double> %[[shuf]], ptr %{{.*}}, align 16
 end subroutine vec_permi_test_r8i2
 
 ! CHECK-LABEL: vec_permi_test_r8i4
@@ -824,25 +336,10 @@ subroutine vec_permi_test_r8i4(arg1, arg2, arg3)
   vector(real(8)) :: arg1, arg2, r
   r = vec_permi(arg1, arg2, 1_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xf64> to vector<2xf64>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xf64> to vector<2xf64>
-! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [0, 3] : vector<2xf64>, vector<2xf64>
-! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 3] : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[shuf:.*]] = shufflevector <2 x double> %[[arg1]], <2 x double> %[[arg2]], <2 x i32> <i32 0, i32 3>
-! CHECK: store <2 x double> %[[shuf]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[shuf:.*]] = shufflevector <2 x double> %[[arg1]], <2 x double> %[[arg2]], <2 x i32> <i32 0, i32 3>
+! LLVMIR: store <2 x double> %[[shuf]], ptr %{{.*}}, align 16
 end subroutine vec_permi_test_r8i4
 
 ! CHECK-LABEL: vec_permi_test_r8i8
@@ -850,23 +347,8 @@ subroutine vec_permi_test_r8i8(arg1, arg2, arg3)
   vector(real(8)) :: arg1, arg2, r
   r = vec_permi(arg1, arg2, 0_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xf64> to vector<2xf64>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xf64> to vector<2xf64>
-! CHECK-FIR: %[[shuf:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [0, 2] : vector<2xf64>, vector<2xf64>
-! CHECK-FIR: %[[cshuf:.*]] = fir.convert %[[shuf]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[cshuf]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[shuf:.*]] = llvm.shufflevector %[[arg1]], %[[arg2]] [0, 2] : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[shuf]], %{{.*}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[shuf:.*]] = shufflevector <2 x double> %[[arg1]], <2 x double> %[[arg2]], <2 x i32> <i32 0, i32 2>
-! CHECK: store <2 x double> %[[shuf]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[shuf:.*]] = shufflevector <2 x double> %[[arg1]], <2 x double> %[[arg2]], <2 x i32> <i32 0, i32 2>
+! LLVMIR: store <2 x double> %[[shuf]], ptr %{{.*}}, align 16
 end subroutine vec_permi_test_r8i8

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-sel.f90 b/flang/test/Lower/PowerPC/ppc-vec-sel.f90
new file mode 100644
index 000000000000000..c3a7288b6b4d055
--- /dev/null
+++ b/flang/test/Lower/PowerPC/ppc-vec-sel.f90
@@ -0,0 +1,189 @@
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
+! REQUIRES: target=powerpc{{.*}}
+
+!----------------------
+! vec_sel
+!----------------------
+
+! CHECK-LABEL: vec_sel_testi1
+subroutine vec_sel_testi1(arg1, arg2, arg3)
+  vector(integer(1)) :: arg1, arg2, r
+  vector(unsigned(1)) :: arg3
+  r = vec_sel(arg1, arg2, arg3)
+
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR:  %[[comp:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! LLVMIR:  %[[and1:.*]] = and <16 x i8> %[[arg1]], %[[comp]]
+! LLVMIR:  %[[and2:.*]] = and <16 x i8> %[[arg2]], %[[arg3]]
+! LLVMIR:  %{{[0-9]+}} = or <16 x i8> %[[and1]], %[[and2]]
+end subroutine vec_sel_testi1
+
+! CHECK-LABEL: vec_sel_testi2
+subroutine vec_sel_testi2(arg1, arg2, arg3)
+  vector(integer(2)) :: arg1, arg2, r
+  vector(unsigned(2)) :: arg3
+  r = vec_sel(arg1, arg2, arg3)
+
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg3:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[bc1:.*]] = bitcast <8 x i16> %5 to <16 x i8>
+! LLVMIR: %[[bc2:.*]] = bitcast <8 x i16> %6 to <16 x i8>
+! LLVMIR: %[[bc3:.*]] = bitcast <8 x i16> %7 to <16 x i8>
+! LLVMIR: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! LLVMIR: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
+! LLVMIR: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
+! LLVMIR: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
+! LLVMIR: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <8 x i16>
+end subroutine vec_sel_testi2
+
+! CHECK-LABEL: vec_sel_testi4
+subroutine vec_sel_testi4(arg1, arg2, arg3)
+  vector(integer(4)) :: arg1, arg2, r
+  vector(unsigned(4)) :: arg3
+  r = vec_sel(arg1, arg2, arg3)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg3:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[bc1:.*]] = bitcast <4 x i32> %5 to <16 x i8>
+! LLVMIR: %[[bc2:.*]] = bitcast <4 x i32> %6 to <16 x i8>
+! LLVMIR: %[[bc3:.*]] = bitcast <4 x i32> %7 to <16 x i8>
+! LLVMIR: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! LLVMIR: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
+! LLVMIR: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
+! LLVMIR: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
+! LLVMIR: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <4 x i32>
+end subroutine vec_sel_testi4
+
+! CHECK-LABEL: vec_sel_testi8
+subroutine vec_sel_testi8(arg1, arg2, arg3)
+  vector(integer(8)) :: arg1, arg2, r
+  vector(unsigned(8)) :: arg3
+  r = vec_sel(arg1, arg2, arg3)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg3:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[bc1:.*]] = bitcast <2 x i64> %5 to <16 x i8>
+! LLVMIR: %[[bc2:.*]] = bitcast <2 x i64> %6 to <16 x i8>
+! LLVMIR: %[[bc3:.*]] = bitcast <2 x i64> %7 to <16 x i8>
+! LLVMIR: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! LLVMIR: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
+! LLVMIR: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
+! LLVMIR: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
+! LLVMIR: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <2 x i64>
+end subroutine vec_sel_testi8
+
+! CHECK-LABEL: vec_sel_testu1
+subroutine vec_sel_testu1(arg1, arg2, arg3)
+  vector(unsigned(1)) :: arg1, arg2, r
+  vector(unsigned(1)) :: arg3
+  r = vec_sel(arg1, arg2, arg3)
+
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR:  %[[comp:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! LLVMIR:  %[[and1:.*]] = and <16 x i8> %[[arg1]], %[[comp]]
+! LLVMIR:  %[[and2:.*]] = and <16 x i8> %[[arg2]], %[[arg3]]
+! LLVMIR:  %{{[0-9]+}} = or <16 x i8> %[[and1]], %[[and2]]
+end subroutine vec_sel_testu1
+
+! CHECK-LABEL: vec_sel_testu2
+subroutine vec_sel_testu2(arg1, arg2, arg3)
+  vector(unsigned(2)) :: arg1, arg2, r
+  vector(unsigned(2)) :: arg3
+  r = vec_sel(arg1, arg2, arg3)
+
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg3:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[bc1:.*]] = bitcast <8 x i16> %5 to <16 x i8>
+! LLVMIR: %[[bc2:.*]] = bitcast <8 x i16> %6 to <16 x i8>
+! LLVMIR: %[[bc3:.*]] = bitcast <8 x i16> %7 to <16 x i8>
+! LLVMIR: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! LLVMIR: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
+! LLVMIR: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
+! LLVMIR: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
+! LLVMIR: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <8 x i16>
+end subroutine vec_sel_testu2
+
+! CHECK-LABEL: vec_sel_testu4
+subroutine vec_sel_testu4(arg1, arg2, arg3)
+  vector(unsigned(4)) :: arg1, arg2, r
+  vector(unsigned(4)) :: arg3
+  r = vec_sel(arg1, arg2, arg3)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg3:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[bc1:.*]] = bitcast <4 x i32> %5 to <16 x i8>
+! LLVMIR: %[[bc2:.*]] = bitcast <4 x i32> %6 to <16 x i8>
+! LLVMIR: %[[bc3:.*]] = bitcast <4 x i32> %7 to <16 x i8>
+! LLVMIR: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! LLVMIR: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
+! LLVMIR: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
+! LLVMIR: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
+! LLVMIR: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <4 x i32>
+end subroutine vec_sel_testu4
+
+! CHECK-LABEL: vec_sel_testu8
+subroutine vec_sel_testu8(arg1, arg2, arg3)
+  vector(unsigned(8)) :: arg1, arg2, r
+  vector(unsigned(8)) :: arg3
+  r = vec_sel(arg1, arg2, arg3)
+  
+
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg3:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[bc1:.*]] = bitcast <2 x i64> %5 to <16 x i8>
+! LLVMIR: %[[bc2:.*]] = bitcast <2 x i64> %6 to <16 x i8>
+! LLVMIR: %[[bc3:.*]] = bitcast <2 x i64> %7 to <16 x i8>
+! LLVMIR: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! LLVMIR: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
+! LLVMIR: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
+! LLVMIR: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
+! LLVMIR: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <2 x i64>
+end subroutine vec_sel_testu8
+
+! CHECK-LABEL: vec_sel_testr4
+subroutine vec_sel_testr4(arg1, arg2, arg3)
+  vector(real(4)) :: arg1, arg2, r
+  vector(unsigned(4)) :: arg3
+  r = vec_sel(arg1, arg2, arg3)
+
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg3:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[bc1:.*]] = bitcast <4 x float> %5 to <16 x i8>
+! LLVMIR: %[[bc2:.*]] = bitcast <4 x float> %6 to <16 x i8>
+! LLVMIR: %[[bc3:.*]] = bitcast <4 x i32> %7 to <16 x i8>
+! LLVMIR: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! LLVMIR: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
+! LLVMIR: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
+! LLVMIR: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
+! LLVMIR: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <4 x float>
+end subroutine vec_sel_testr4
+
+! CHECK-LABEL: vec_sel_testr8
+subroutine vec_sel_testr8(arg1, arg2, arg3)
+  vector(real(8)) :: arg1, arg2, r
+  vector(unsigned(8)) :: arg3
+  r = vec_sel(arg1, arg2, arg3)
+
+! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg3:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[bc1:.*]] = bitcast <2 x double> %5 to <16 x i8>
+! LLVMIR: %[[bc2:.*]] = bitcast <2 x double> %6 to <16 x i8>
+! LLVMIR: %[[bc3:.*]] = bitcast <2 x i64> %7 to <16 x i8>
+! LLVMIR: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+! LLVMIR: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
+! LLVMIR: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
+! LLVMIR: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
+! LLVMIR: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <2 x double>
+end subroutine vec_sel_testr8

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-shift-be-le.f90 b/flang/test/Lower/PowerPC/ppc-vec-shift-be-le.f90
index fa8c6a1f725f22c..bd83f28b4eeb522 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-shift-be-le.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-shift-be-le.f90
@@ -1,8 +1,6 @@
-! RUN: %flang_fc1 -emit-fir %s -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="CHECK-FIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="CHECK" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="CHECK" %s
 !
-! RUN: %flang_fc1 -emit-fir %s -triple ppc64-unknown-aix -o - | FileCheck --check-prefixes="BE-FIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -triple ppc64-unknown-aix -o - | FileCheck --check-prefixes="BE-IR" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -triple ppc64-unknown-aix -o - | FileCheck --check-prefixes="BE-LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
 !----------------------
@@ -14,64 +12,31 @@ subroutine vec_sld_test_i1i1(arg1, arg2)
   vector(integer(1)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg2]], %[[carg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! BE-IR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
-end subroutine vec_sld_test_i1i1
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 
+! BE-LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+end subroutine vec_sld_test_i1i1
 
 ! CHECK-LABEL: vec_sld_test_i1i2
 subroutine vec_sld_test_i1i2(arg1, arg2)
   vector(integer(1)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg2]], %[[carg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! BE-IR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_i1i2
 
 ! CHECK-LABEL: vec_sld_test_i1i4
@@ -79,31 +44,15 @@ subroutine vec_sld_test_i1i4(arg1, arg2)
   vector(integer(1)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg2]], %[[carg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! BE-IR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_i1i4
 
 ! CHECK-LABEL: vec_sld_test_i1i8
@@ -111,31 +60,15 @@ subroutine vec_sld_test_i1i8(arg1, arg2)
   vector(integer(1)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg2]], %[[carg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! BE-IR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_i1i8
 
 ! CHECK-LABEL: vec_sld_test_i2i1
@@ -143,43 +76,21 @@ subroutine vec_sld_test_i2i1(arg1, arg2)
   vector(integer(2)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! CHECK: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! BE-IR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! BE-IR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_i2i1
 
 ! CHECK-LABEL: vec_sld_test_i2i2
@@ -187,43 +98,21 @@ subroutine vec_sld_test_i2i2(arg1, arg2)
   vector(integer(2)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 8_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! CHECK: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! BE-IR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! BE-IR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_i2i2
 
 ! CHECK-LABEL: vec_sld_test_i2i4
@@ -231,43 +120,21 @@ subroutine vec_sld_test_i2i4(arg1, arg2)
   vector(integer(2)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! CHECK: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! BE-IR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! BE-IR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_i2i4
 
 ! CHECK-LABEL: vec_sld_test_i2i8
@@ -275,43 +142,21 @@ subroutine vec_sld_test_i2i8(arg1, arg2)
   vector(integer(2)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 11_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! CHECK: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! BE-IR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! BE-IR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_i2i8
 
 ! CHECK-LABEL: vec_sld_test_i4i1
@@ -319,43 +164,21 @@ subroutine vec_sld_test_i4i1(arg1, arg2)
   vector(integer(4)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! CHECK: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! BE-IR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_i4i1
 
 ! CHECK-LABEL: vec_sld_test_i4i2
@@ -363,43 +186,21 @@ subroutine vec_sld_test_i4i2(arg1, arg2)
   vector(integer(4)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! CHECK: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! BE-IR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_i4i2
 
 ! CHECK-LABEL: vec_sld_test_i4i4
@@ -407,43 +208,21 @@ subroutine vec_sld_test_i4i4(arg1, arg2)
   vector(integer(4)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! CHECK: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! BE-IR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_i4i4
 
 ! CHECK-LABEL: vec_sld_test_i4i8
@@ -451,43 +230,21 @@ subroutine vec_sld_test_i4i8(arg1, arg2)
   vector(integer(4)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! CHECK: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! BE-IR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_i4i8
 
 ! CHECK-LABEL: vec_sld_test_u1i1
@@ -495,31 +252,15 @@ subroutine vec_sld_test_u1i1(arg1, arg2)
   vector(unsigned(1)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg2]], %[[carg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! BE-IR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_u1i1
 
 ! CHECK-LABEL: vec_sld_test_u1i2
@@ -527,31 +268,15 @@ subroutine vec_sld_test_u1i2(arg1, arg2)
   vector(unsigned(1)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg2]], %[[carg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! BE-IR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_u1i2
 
 ! CHECK-LABEL: vec_sld_test_u1i4
@@ -559,31 +284,15 @@ subroutine vec_sld_test_u1i4(arg1, arg2)
   vector(unsigned(1)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg2]], %[[carg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! BE-IR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_u1i4
 
 ! CHECK-LABEL: vec_sld_test_u1i8
@@ -591,31 +300,15 @@ subroutine vec_sld_test_u1i8(arg1, arg2)
   vector(unsigned(1)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg2]], %[[carg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! BE-IR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_u1i8
 
 ! CHECK-LABEL: vec_sld_test_u2i1
@@ -623,43 +316,21 @@ subroutine vec_sld_test_u2i1(arg1, arg2)
   vector(unsigned(2)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! CHECK: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! BE-IR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! BE-IR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_u2i1
 
 ! CHECK-LABEL: vec_sld_test_u2i2
@@ -667,43 +338,21 @@ subroutine vec_sld_test_u2i2(arg1, arg2)
   vector(unsigned(2)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! CHECK: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! BE-IR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! BE-IR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_u2i2
 
 ! CHECK-LABEL: vec_sld_test_u2i4
@@ -711,43 +360,21 @@ subroutine vec_sld_test_u2i4(arg1, arg2)
   vector(unsigned(2)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! CHECK: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! BE-IR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! BE-IR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_u2i4
 
 ! CHECK-LABEL: vec_sld_test_u2i8
@@ -755,43 +382,21 @@ subroutine vec_sld_test_u2i8(arg1, arg2)
   vector(unsigned(2)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! CHECK: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! BE-IR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! BE-IR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_u2i8
 
 ! CHECK-LABEL: vec_sld_test_u4i1
@@ -799,43 +404,21 @@ subroutine vec_sld_test_u4i1(arg1, arg2)
   vector(unsigned(4)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! CHECK: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! BE-IR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_u4i1
 
 ! CHECK-LABEL: vec_sld_test_u4i2
@@ -843,43 +426,21 @@ subroutine vec_sld_test_u4i2(arg1, arg2)
   vector(unsigned(4)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! CHECK: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! BE-IR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_u4i2
 
 ! CHECK-LABEL: vec_sld_test_u4i4
@@ -887,43 +448,21 @@ subroutine vec_sld_test_u4i4(arg1, arg2)
   vector(unsigned(4)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! CHECK: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! BE-IR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_u4i4
 
 ! CHECK-LABEL: vec_sld_test_u4i8
@@ -931,43 +470,21 @@ subroutine vec_sld_test_u4i8(arg1, arg2)
   vector(unsigned(4)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! CHECK: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! BE-IR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_u4i8
 
 ! CHECK-LABEL: vec_sld_test_r4i1
@@ -975,43 +492,21 @@ subroutine vec_sld_test_r4i1(arg1, arg2)
   vector(real(4)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xf32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
-! CHECK: store <4 x float> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xf32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
-! BE-IR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
+! LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
+! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_r4i1
 
 ! CHECK-LABEL: vec_sld_test_r4i2
@@ -1019,43 +514,21 @@ subroutine vec_sld_test_r4i2(arg1, arg2)
   vector(real(4)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xf32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
-! CHECK: store <4 x float> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xf32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
-! BE-IR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
+! LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
+! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_r4i2
 
 ! CHECK-LABEL: vec_sld_test_r4i4
@@ -1063,43 +536,21 @@ subroutine vec_sld_test_r4i4(arg1, arg2)
   vector(real(4)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 3_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xf32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
-! CHECK: store <4 x float> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xf32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
-! BE-IR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
+! LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
+! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_r4i4
 
 ! CHECK-LABEL: vec_sld_test_r4i8
@@ -1107,43 +558,21 @@ subroutine vec_sld_test_r4i8(arg1, arg2)
   vector(real(4)) :: arg1, arg2, r
   r = vec_sld(arg1, arg2, 1_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xf32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
-! CHECK: store <4 x float> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xf32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
-! BE-IR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
+! LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
+! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sld_test_r4i8
 
 !----------------------
@@ -1154,31 +583,15 @@ subroutine vec_sldw_test_i1i1(arg1, arg2)
   vector(integer(1)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg2]], %[[carg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! BE-IR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_i1i1
 
 ! CHECK-LABEL: vec_sldw_test_i1i2
@@ -1186,31 +599,15 @@ subroutine vec_sldw_test_i1i2(arg1, arg2)
   vector(integer(1)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg2]], %[[carg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! BE-IR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_i1i2
 
 ! CHECK-LABEL: vec_sldw_test_i1i4
@@ -1218,31 +615,15 @@ subroutine vec_sldw_test_i1i4(arg1, arg2)
   vector(integer(1)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg2]], %[[carg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! BE-IR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_i1i4
 
 ! CHECK-LABEL: vec_sldw_test_i1i8
@@ -1250,31 +631,15 @@ subroutine vec_sldw_test_i1i8(arg1, arg2)
   vector(integer(1)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg2]], %[[carg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-
-! BE-IR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_i1i8
 
 ! CHECK-LABEL: vec_sldw_test_i2i1
@@ -1282,43 +647,21 @@ subroutine vec_sldw_test_i2i1(arg1, arg2)
   vector(integer(2)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! CHECK: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! BE-IR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! BE-IR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_i2i1
 
 ! CHECK-LABEL: vec_sldw_test_i2i2
@@ -1326,43 +669,21 @@ subroutine vec_sldw_test_i2i2(arg1, arg2)
   vector(integer(2)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! CHECK: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! BE-IR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! BE-IR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_i2i2
 
 ! CHECK-LABEL: vec_sldw_test_i2i4
@@ -1370,43 +691,21 @@ subroutine vec_sldw_test_i2i4(arg1, arg2)
   vector(integer(2)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! CHECK: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! BE-IR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! BE-IR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_i2i4
 
 ! CHECK-LABEL: vec_sldw_test_i2i8
@@ -1414,43 +713,21 @@ subroutine vec_sldw_test_i2i8(arg1, arg2)
   vector(integer(2)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! CHECK: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-
-! BE-IR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! BE-IR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_i2i8
 
 ! CHECK-LABEL: vec_sldw_test_i4i1
@@ -1458,43 +735,21 @@ subroutine vec_sldw_test_i4i1(arg1, arg2)
   vector(integer(4)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! CHECK: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! BE-IR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_i4i1
 
 ! CHECK-LABEL: vec_sldw_test_i4i2
@@ -1502,43 +757,21 @@ subroutine vec_sldw_test_i4i2(arg1, arg2)
   vector(integer(4)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! CHECK: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! BE-IR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_i4i2
 
 ! CHECK-LABEL: vec_sldw_test_i4i4
@@ -1546,43 +779,21 @@ subroutine vec_sldw_test_i4i4(arg1, arg2)
   vector(integer(4)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! CHECK: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! BE-IR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_i4i4
 
 ! CHECK-LABEL: vec_sldw_test_i4i8
@@ -1590,43 +801,21 @@ subroutine vec_sldw_test_i4i8(arg1, arg2)
   vector(integer(4)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! CHECK: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! BE-IR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_i4i8
 
 ! CHECK-LABEL: vec_sldw_test_i8i1
@@ -1634,43 +823,21 @@ subroutine vec_sldw_test_i8i1(arg1, arg2)
   vector(integer(8)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xi64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
-! CHECK: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xi64> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xi64> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xi64>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! BE-IR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
-! BE-IR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
+! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_i8i1
 
 ! CHECK-LABEL: vec_sldw_test_i8i2
@@ -1678,43 +845,21 @@ subroutine vec_sldw_test_i8i2(arg1, arg2)
   vector(integer(8)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xi64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
-! CHECK: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xi64> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xi64> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xi64>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! BE-IR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
-! BE-IR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
+! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_i8i2
 
 ! CHECK-LABEL: vec_sldw_test_i8i4
@@ -1722,43 +867,21 @@ subroutine vec_sldw_test_i8i4(arg1, arg2)
   vector(integer(8)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xi64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
-! CHECK: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xi64> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xi64> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xi64>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! BE-IR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
-! BE-IR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
+! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_i8i4
 
 ! CHECK-LABEL: vec_sldw_test_i8i8
@@ -1766,43 +889,21 @@ subroutine vec_sldw_test_i8i8(arg1, arg2)
   vector(integer(8)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xi64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
-! CHECK: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xi64> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xi64> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xi64>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-
-! BE-IR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
-! BE-IR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
+! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
 
 end subroutine vec_sldw_test_i8i8
 
@@ -1811,31 +912,15 @@ subroutine vec_sldw_test_u1i1(arg1, arg2)
   vector(unsigned(1)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg2]], %[[carg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! BE-IR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_u1i1
 
 ! CHECK-LABEL: vec_sldw_test_u1i2
@@ -1843,31 +928,15 @@ subroutine vec_sldw_test_u1i2(arg1, arg2)
   vector(unsigned(1)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg2]], %[[carg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! BE-IR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_u1i2
 
 ! CHECK-LABEL: vec_sldw_test_u1i4
@@ -1875,31 +944,15 @@ subroutine vec_sldw_test_u1i4(arg1, arg2)
   vector(unsigned(1)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg2]], %[[carg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! BE-IR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_u1i4
 
 ! CHECK-LABEL: vec_sldw_test_u1i8
@@ -1907,31 +960,15 @@ subroutine vec_sldw_test_u1i8(arg1, arg2)
   vector(unsigned(1)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[carg2]], %[[carg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[carg1]], %[[carg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-
-! BE-IR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg2]], <16 x i8> %[[arg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[arg1]], <16 x i8> %[[arg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_u1i8
 
 ! CHECK-LABEL: vec_sldw_test_u2i1
@@ -1939,43 +976,21 @@ subroutine vec_sldw_test_u2i1(arg1, arg2)
   vector(unsigned(2)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! CHECK: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! BE-IR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! BE-IR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_u2i1
 
 ! CHECK-LABEL: vec_sldw_test_u2i2
@@ -1983,43 +998,21 @@ subroutine vec_sldw_test_u2i2(arg1, arg2)
   vector(unsigned(2)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! CHECK: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! BE-IR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! BE-IR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_u2i2
 
 ! CHECK-LABEL: vec_sldw_test_u2i4
@@ -2027,43 +1020,21 @@ subroutine vec_sldw_test_u2i4(arg1, arg2)
   vector(unsigned(2)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! CHECK: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! BE-IR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! BE-IR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_u2i4
 
 ! CHECK-LABEL: vec_sldw_test_u2i8
@@ -2071,43 +1042,21 @@ subroutine vec_sldw_test_u2i8(arg1, arg2)
   vector(unsigned(2)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! CHECK: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<8xi16> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<8xi16>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-
-! BE-IR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
-! BE-IR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <8 x i16> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <8 x i16> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <8 x i16>
+! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_u2i8
 
 ! CHECK-LABEL: vec_sldw_test_u4i1
@@ -2115,43 +1064,21 @@ subroutine vec_sldw_test_u4i1(arg1, arg2)
   vector(unsigned(4)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! CHECK: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! BE-IR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_u4i1
 
 ! CHECK-LABEL: vec_sldw_test_u4i2
@@ -2159,43 +1086,21 @@ subroutine vec_sldw_test_u4i2(arg1, arg2)
   vector(unsigned(4)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! CHECK: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! BE-IR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_u4i2
 
 ! CHECK-LABEL: vec_sldw_test_u4i4
@@ -2203,43 +1108,21 @@ subroutine vec_sldw_test_u4i4(arg1, arg2)
   vector(unsigned(4)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! CHECK: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! BE-IR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_u4i4
 
 ! CHECK-LABEL: vec_sldw_test_u4i8
@@ -2247,43 +1130,21 @@ subroutine vec_sldw_test_u4i8(arg1, arg2)
   vector(unsigned(4)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! CHECK: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xi32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xi32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
-! BE-IR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x i32> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x i32> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x i32>
+! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_u4i8
 
 ! CHECK-LABEL: vec_sldw_test_u8i1
@@ -2291,43 +1152,21 @@ subroutine vec_sldw_test_u8i1(arg1, arg2)
   vector(unsigned(8)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xi64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
-! CHECK: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xi64> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xi64> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xi64>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! BE-IR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
-! BE-IR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
+! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_u8i1
 
 ! CHECK-LABEL: vec_sldw_test_u8i2
@@ -2335,43 +1174,21 @@ subroutine vec_sldw_test_u8i2(arg1, arg2)
   vector(unsigned(8)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xi64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
-! CHECK: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xi64> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xi64> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xi64>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! BE-IR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
-! BE-IR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
+! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_u8i2
 
 ! CHECK-LABEL: vec_sldw_test_u8i4
@@ -2379,43 +1196,21 @@ subroutine vec_sldw_test_u8i4(arg1, arg2)
   vector(unsigned(8)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xi64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
-! CHECK: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xi64> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xi64> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xi64>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! BE-IR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
-! BE-IR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
+! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_u8i4
 
 ! CHECK-LABEL: vec_sldw_test_u8i8
@@ -2423,43 +1218,21 @@ subroutine vec_sldw_test_u8i8(arg1, arg2)
   vector(unsigned(8)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xi64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
-! CHECK: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xi64> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xi64> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xi64>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-
-! BE-IR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
-! BE-IR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <2 x i64> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <2 x i64> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x i64>
+! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_u8i8
 
 ! CHECK-LABEL: vec_sldw_test_r4i1
@@ -2467,43 +1240,21 @@ subroutine vec_sldw_test_r4i1(arg1, arg2)
   vector(real(4)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xf32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
-! CHECK: store <4 x float> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xf32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
-! BE-IR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
+! LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
+! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_r4i1
 
 ! CHECK-LABEL: vec_sldw_test_r4i2
@@ -2511,43 +1262,21 @@ subroutine vec_sldw_test_r4i2(arg1, arg2)
   vector(real(4)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xf32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
-! CHECK: store <4 x float> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xf32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
-! BE-IR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
+! LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
+! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_r4i2
 
 ! CHECK-LABEL: vec_sldw_test_r4i4
@@ -2555,43 +1284,21 @@ subroutine vec_sldw_test_r4i4(arg1, arg2)
   vector(real(4)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xf32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
-! CHECK: store <4 x float> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xf32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
-! BE-IR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
+! LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
+! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_r4i4
 
 ! CHECK-LABEL: vec_sldw_test_r4i8
@@ -2599,43 +1306,21 @@ subroutine vec_sldw_test_r4i8(arg1, arg2)
   vector(real(4)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xf32>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
-! CHECK: store <4 x float> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<4xf32> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<4xf32> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<4xf32>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! BE-IR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
-! BE-IR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
+! LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <4 x float> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <4 x float> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <4 x float>
+! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_r4i8
 
 ! CHECK-LABEL: vec_sldw_test_r8i1
@@ -2643,43 +1328,21 @@ subroutine vec_sldw_test_r8i1(arg1, arg2)
   vector(real(8)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_1)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xf64> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xf64> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xf64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <2 x double> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <2 x double> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x double>
-! CHECK: store <2 x double> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xf64> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xf64> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xf64>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! BE-IR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <2 x double> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <2 x double> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x double>
-! BE-IR: store <2 x double> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <2 x double> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <2 x double> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x double>
+! LLVMIR: store <2 x double> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <2 x double> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <2 x double> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x double>
+! BE-LLVMIR: store <2 x double> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_r8i1
 
 ! CHECK-LABEL: vec_sldw_test_r8i2
@@ -2687,43 +1350,21 @@ subroutine vec_sldw_test_r8i2(arg1, arg2)
   vector(real(8)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xf64> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xf64> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xf64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <2 x double> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <2 x double> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x double>
-! CHECK: store <2 x double> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xf64> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xf64> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xf64>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! BE-IR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <2 x double> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <2 x double> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x double>
-! BE-IR: store <2 x double> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <2 x double> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <2 x double> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x double>
+! LLVMIR: store <2 x double> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <2 x double> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <2 x double> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x double>
+! BE-LLVMIR: store <2 x double> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_r8i2
 
 ! CHECK-LABEL: vec_sldw_test_r8i4
@@ -2731,43 +1372,21 @@ subroutine vec_sldw_test_r8i4(arg1, arg2)
   vector(real(8)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_4)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xf64> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xf64> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xf64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <2 x double> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <2 x double> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x double>
-! CHECK: store <2 x double> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xf64> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xf64> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xf64>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! BE-IR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <2 x double> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <2 x double> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x double>
-! BE-IR: store <2 x double> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <2 x double> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <2 x double> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x double>
+! LLVMIR: store <2 x double> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <2 x double> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <2 x double> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x double>
+! BE-LLVMIR: store <2 x double> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_r8i4
 
 ! CHECK-LABEL: vec_sldw_test_r8i8
@@ -2775,41 +1394,19 @@ subroutine vec_sldw_test_r8i8(arg1, arg2)
   vector(real(8)) :: arg1, arg2, r
   r = vec_sldw(arg1, arg2, 3_8)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xf64> to vector<16xi8>
-! CHECK-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xf64> to vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = vector.shuffle %[[barg2]], %[[barg1]] [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] : vector<16xi8>, vector<16xi8>
-! CHECK-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xf64>
-! CHECK-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[barg1:.*]] = bitcast <2 x double> %[[arg1]] to <16 x i8>
-! CHECK: %[[barg2:.*]] = bitcast <2 x double> %[[arg2]] to <16 x i8>
-! CHECK: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-! CHECK: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x double>
-! CHECK: store <2 x double> %[[br]], ptr %{{.*}}, align 16
-
-! BE-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! BE-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! BE-FIR: %[[carg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! BE-FIR: %[[carg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! BE-FIR: %[[barg1:.*]] = llvm.bitcast %[[carg1]] : vector<2xf64> to vector<16xi8>
-! BE-FIR: %[[barg2:.*]] = llvm.bitcast %[[carg2]] : vector<2xf64> to vector<16xi8>
-! BE-FIR: %[[r:.*]] = vector.shuffle %[[barg1]], %[[barg2]] [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] : vector<16xi8>, vector<16xi8>
-! BE-FIR: %[[br:.*]] = llvm.bitcast %[[r]] : vector<16xi8> to vector<2xf64>
-! BE-FIR: %[[cr:.*]] = fir.convert %[[br]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! BE-FIR: fir.store %[[cr]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! BE-IR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! BE-IR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! BE-IR: %[[barg1:.*]] = bitcast <2 x double> %[[arg1]] to <16 x i8>
-! BE-IR: %[[barg2:.*]] = bitcast <2 x double> %[[arg2]] to <16 x i8>
-! BE-IR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
-! BE-IR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x double>
-! BE-IR: store <2 x double> %[[br]], ptr %{{.*}}, align 16
+! LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! LLVMIR: %[[barg1:.*]] = bitcast <2 x double> %[[arg1]] to <16 x i8>
+! LLVMIR: %[[barg2:.*]] = bitcast <2 x double> %[[arg2]] to <16 x i8>
+! LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg2]], <16 x i8> %[[barg1]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+! LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x double>
+! LLVMIR: store <2 x double> %[[br]], ptr %{{.*}}, align 16
+
+! BE-LLVMIR: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
+! BE-LLVMIR: %[[barg1:.*]] = bitcast <2 x double> %[[arg1]] to <16 x i8>
+! BE-LLVMIR: %[[barg2:.*]] = bitcast <2 x double> %[[arg2]] to <16 x i8>
+! BE-LLVMIR: %[[r:.*]] = shufflevector <16 x i8> %[[barg1]], <16 x i8> %[[barg2]], <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+! BE-LLVMIR: %[[br:.*]] = bitcast <16 x i8> %[[r]] to <2 x double>
+! BE-LLVMIR: store <2 x double> %[[br]], ptr %{{.*}}, align 16
 end subroutine vec_sldw_test_r8i8

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-shift.f90 b/flang/test/Lower/PowerPC/ppc-vec-shift.f90
index 4608e6b6f1a4537..a20f086c769adc2 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-shift.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-shift.f90
@@ -1,6 +1,4 @@
-! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s
-! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
 !----------------------
@@ -13,26 +11,10 @@ subroutine vec_sl_i1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_sl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %c8_i8 = arith.constant 8 : i8
-! CHECK-FIR: %[[cv:.*]] = vector.broadcast %c8_i8 : i8 to vector<16xi8>
-! CHECK-FIR: %[[msk:.*]] = arith.remui %[[varg2]], %[[cv]] : vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = arith.shli %[[varg1]], %[[msk]] : vector<16xi8>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[cv:.*]] = llvm.mlir.constant(dense<8> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[msk:.*]] = llvm.urem %[[arg2]], %[[cv]]  : vector<16xi8>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.shl %[[arg1]], %[[msk]]  : vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[msk:.*]] = urem <16 x i8> %[[arg2]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
-! CHECK: %7 = shl <16 x i8> %[[arg1]], %[[msk]]
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[msk:.*]] = urem <16 x i8> %[[arg2]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+! LLVMIR: %7 = shl <16 x i8> %[[arg1]], %[[msk]]
 end subroutine vec_sl_i1
 
 ! CHECK-LABEL: vec_sl_i2
@@ -41,26 +23,10 @@ subroutine vec_sl_i2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_sl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %c16_i16 = arith.constant 16 : i16
-! CHECK-FIR: %[[cv:.*]] = vector.broadcast %c16_i16 : i16 to vector<8xi16>
-! CHECK-FIR: %[[msk:.*]] = arith.remui %[[varg2]], %[[cv]] : vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = arith.shli %[[varg1]], %[[msk]] : vector<8xi16>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:i16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[cv:.*]] = llvm.mlir.constant(dense<16> : vector<8xi16>) : vector<8xi16>
-! CHECK-LLVMIR: %[[msk:.*]] = llvm.urem %[[arg2]], %[[cv]]  : vector<8xi16>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.shl %[[arg1]], %[[msk]]  : vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[msk:.*]] = urem <8 x i16> %[[arg2]], <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-! CHECK: %7 = shl <8 x i16> %[[arg1]], %[[msk]]
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[msk:.*]] = urem <8 x i16> %[[arg2]], <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+! LLVMIR: %7 = shl <8 x i16> %[[arg1]], %[[msk]]
 end subroutine vec_sl_i2
 
 ! CHECK-LABEL: vec_sl_i4
@@ -69,26 +35,10 @@ subroutine vec_sl_i4(arg1, arg2)
   vector(unsigned(4)) :: arg2
   r = vec_sl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %c32_i32 = arith.constant 32 : i32
-! CHECK-FIR: %[[cv:.*]] = vector.broadcast %c32_i32 : i32 to vector<4xi32>
-! CHECK-FIR: %[[msk:.*]] = arith.remui %[[varg2]], %[[cv]] : vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = arith.shli %[[varg1]], %[[msk]] : vector<4xi32>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:i32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[cv:.*]] = llvm.mlir.constant(dense<32> : vector<4xi32>) : vector<4xi32>
-! CHECK-LLVMIR: %[[msk:.*]] = llvm.urem %[[arg2]], %[[cv]]  : vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.shl %[[arg1]], %[[msk]]  : vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[msk:.*]] = urem <4 x i32> %[[arg2]], <i32 32, i32 32, i32 32, i32 32>
-! CHECK: %7 = shl <4 x i32> %[[arg1]], %[[msk]]
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[msk:.*]] = urem <4 x i32> %[[arg2]], <i32 32, i32 32, i32 32, i32 32>
+! LLVMIR: %7 = shl <4 x i32> %[[arg1]], %[[msk]]
 end subroutine vec_sl_i4
 
 ! CHECK-LABEL: vec_sl_i8
@@ -97,26 +47,10 @@ subroutine vec_sl_i8(arg1, arg2)
   vector(unsigned(8)) :: arg2
   r = vec_sl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %c64_i64 = arith.constant 64 : i64
-! CHECK-FIR: %[[cv:.*]] = vector.broadcast %c64_i64 : i64 to vector<2xi64>
-! CHECK-FIR: %[[msk:.*]] = arith.remui %[[varg2]], %[[cv]] : vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = arith.shli %[[varg1]], %[[msk]] : vector<2xi64>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:i64>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[cv:.*]] = llvm.mlir.constant(dense<64> : vector<2xi64>) : vector<2xi64>
-! CHECK-LLVMIR: %[[msk:.*]] = llvm.urem %[[arg2]], %[[cv]]  : vector<2xi64>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.shl %[[arg1]], %[[msk]]  : vector<2xi64>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[msk:.*]] = urem <2 x i64> %[[arg2]], <i64 64, i64 64>
-! CHECK: %7 = shl <2 x i64> %[[arg1]], %[[msk]]
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[msk:.*]] = urem <2 x i64> %[[arg2]], <i64 64, i64 64>
+! LLVMIR: %7 = shl <2 x i64> %[[arg1]], %[[msk]]
 end subroutine vec_sl_i8
 
 ! CHECK-LABEL: vec_sl_u1
@@ -125,26 +59,10 @@ subroutine vec_sl_u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_sl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %c8_i8 = arith.constant 8 : i8
-! CHECK-FIR: %[[cv:.*]] = vector.broadcast %c8_i8 : i8 to vector<16xi8>
-! CHECK-FIR: %[[msk:.*]] = arith.remui %[[varg2]], %[[cv]] : vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = arith.shli %[[varg1]], %[[msk]] : vector<16xi8>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[cv:.*]] = llvm.mlir.constant(dense<8> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[msk:.*]] = llvm.urem %[[arg2]], %[[cv]]  : vector<16xi8>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.shl %[[arg1]], %[[msk]]  : vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[msk:.*]] = urem <16 x i8> %[[arg2]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
-! CHECK: %7 = shl <16 x i8> %[[arg1]], %[[msk]]
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[msk:.*]] = urem <16 x i8> %[[arg2]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+! LLVMIR: %7 = shl <16 x i8> %[[arg1]], %[[msk]]
 end subroutine vec_sl_u1
 
 ! CHECK-LABEL: vec_sl_u2
@@ -153,26 +71,10 @@ subroutine vec_sl_u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_sl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %c16_i16 = arith.constant 16 : i16
-! CHECK-FIR: %[[cv:.*]] = vector.broadcast %c16_i16 : i16 to vector<8xi16>
-! CHECK-FIR: %[[msk:.*]] = arith.remui %[[varg2]], %[[cv]] : vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = arith.shli %[[varg1]], %[[msk]] : vector<8xi16>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[cv:.*]] = llvm.mlir.constant(dense<16> : vector<8xi16>) : vector<8xi16>
-! CHECK-LLVMIR: %[[msk:.*]] = llvm.urem %[[arg2]], %[[cv]]  : vector<8xi16>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.shl %[[arg1]], %[[msk]]  : vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[msk:.*]] = urem <8 x i16> %[[arg2]], <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-! CHECK: %7 = shl <8 x i16> %[[arg1]], %[[msk]]
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[msk:.*]] = urem <8 x i16> %[[arg2]], <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+! LLVMIR: %7 = shl <8 x i16> %[[arg1]], %[[msk]]
 end subroutine vec_sl_u2
 
 ! CHECK-LABEL: vec_sl_u4
@@ -181,26 +83,10 @@ subroutine vec_sl_u4(arg1, arg2)
   vector(unsigned(4)) :: arg2
   r = vec_sl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %c32_i32 = arith.constant 32 : i32
-! CHECK-FIR: %[[cv:.*]] = vector.broadcast %c32_i32 : i32 to vector<4xi32>
-! CHECK-FIR: %[[msk:.*]] = arith.remui %[[varg2]], %[[cv]] : vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = arith.shli %[[varg1]], %[[msk]] : vector<4xi32>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[cv:.*]] = llvm.mlir.constant(dense<32> : vector<4xi32>) : vector<4xi32>
-! CHECK-LLVMIR: %[[msk:.*]] = llvm.urem %[[arg2]], %[[cv]]  : vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.shl %[[arg1]], %[[msk]]  : vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[msk:.*]] = urem <4 x i32> %[[arg2]], <i32 32, i32 32, i32 32, i32 32>
-! CHECK: %7 = shl <4 x i32> %[[arg1]], %[[msk]]
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[msk:.*]] = urem <4 x i32> %[[arg2]], <i32 32, i32 32, i32 32, i32 32>
+! LLVMIR: %7 = shl <4 x i32> %[[arg1]], %[[msk]]
 end subroutine vec_sl_u4
 
 ! CHECK-LABEL: vec_sl_u8
@@ -209,26 +95,10 @@ subroutine vec_sl_u8(arg1, arg2)
   vector(unsigned(8)) :: arg2
   r = vec_sl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %c64_i64 = arith.constant 64 : i64
-! CHECK-FIR: %[[cv:.*]] = vector.broadcast %c64_i64 : i64 to vector<2xi64>
-! CHECK-FIR: %[[msk:.*]] = arith.remui %[[varg2]], %[[cv]] : vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = arith.shli %[[varg1]], %[[msk]] : vector<2xi64>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[cv:.*]] = llvm.mlir.constant(dense<64> : vector<2xi64>) : vector<2xi64>
-! CHECK-LLVMIR: %[[msk:.*]] = llvm.urem %[[arg2]], %[[cv]]  : vector<2xi64>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.shl %[[arg1]], %[[msk]]  : vector<2xi64>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[msk:.*]] = urem <2 x i64> %[[arg2]], <i64 64, i64 64>
-! CHECK: %{{[0-9]+}} = shl <2 x i64> %[[arg1]], %[[msk]]
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[msk:.*]] = urem <2 x i64> %[[arg2]], <i64 64, i64 64>
+! LLVMIR: %{{[0-9]+}} = shl <2 x i64> %[[arg1]], %[[msk]]
 end subroutine vec_sl_u8
 
 !----------------------
@@ -240,30 +110,12 @@ subroutine vec_sll_i1u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_sll(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsl(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<16xi8>) -> !fir.vector<16:i8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsl(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
 end subroutine vec_sll_i1u1
 
 ! CHECK-LABEL: vec_sll_i2u1
@@ -272,30 +124,12 @@ subroutine vec_sll_i2u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_sll(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsl(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<8xi16>) -> !fir.vector<8:i16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsl(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
 end subroutine vec_sll_i2u1
 
 ! CHECK-LABEL: vec_sll_i4u1
@@ -304,22 +138,10 @@ subroutine vec_sll_i4u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_sll(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsl(%[[varg1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsl(%[[arg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
 end subroutine vec_sll_i4u1
 
 ! CHECK-LABEL: vec_sll_i1u2
@@ -328,30 +150,12 @@ subroutine vec_sll_i1u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_sll(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsl(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<16xi8>) -> !fir.vector<16:i8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsl(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR:    %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
 end subroutine vec_sll_i1u2
 
 ! CHECK-LABEL: vec_sll_i2u2
@@ -360,30 +164,12 @@ subroutine vec_sll_i2u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_sll(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsl(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<8xi16>) -> !fir.vector<8:i16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsl(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
 end subroutine vec_sll_i2u2
 
 ! CHECK-LABEL: vec_sll_i4u2
@@ -392,22 +178,10 @@ subroutine vec_sll_i4u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_sll(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsl(%[[varg1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsl(%[[arg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
 end subroutine vec_sll_i4u2
 
 ! CHECK-LABEL: vec_sll_i1u4
@@ -416,27 +190,11 @@ subroutine vec_sll_i1u4(arg1, arg2)
   vector(unsigned(4)) :: arg2
   r = vec_sll(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsl(%[[bc1]], %[[varg2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<16xi8>) -> !fir.vector<16:i8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsl(%[[varg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR:    %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[arg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[arg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
 end subroutine vec_sll_i1u4
 
 ! CHECK-LABEL: vec_sll_i2u4
@@ -445,27 +203,11 @@ subroutine vec_sll_i2u4(arg1, arg2)
   vector(unsigned(4)) :: arg2
   r = vec_sll(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsl(%[[bc1]], %[[varg2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<8xi16>) -> !fir.vector<8:i16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsl(%[[varg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[arg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[arg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
 end subroutine vec_sll_i2u4
 
 ! CHECK-LABEL: vec_sll_i4u4
@@ -474,19 +216,9 @@ subroutine vec_sll_i4u4(arg1, arg2)
   vector(unsigned(4)) :: arg2
   r = vec_sll(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsl(%[[varg1]], %[[varg2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsl(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[arg1]], <4 x i32> %[[arg2]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[arg1]], <4 x i32> %[[arg2]])
 end subroutine vec_sll_i4u4
 
 ! CHECK-LABEL: vec_sll_u1u1
@@ -495,30 +227,12 @@ subroutine vec_sll_u1u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_sll(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsl(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsl(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR:    %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
 end subroutine vec_sll_u1u1
 
 ! CHECK-LABEL: vec_sll_u2u1
@@ -527,30 +241,12 @@ subroutine vec_sll_u2u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_sll(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsl(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsl(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
 end subroutine vec_sll_u2u1
 
 ! CHECK-LABEL: vec_sll_u4u1
@@ -559,25 +255,10 @@ subroutine vec_sll_u4u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_sll(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsl(%[[varg1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vsl(%[[arg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
 end subroutine vec_sll_u4u1
 
 ! CHECK-LABEL: vec_sll_u1u2
@@ -586,30 +267,12 @@ subroutine vec_sll_u1u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_sll(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsl(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsl(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR:    %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
 end subroutine vec_sll_u1u2
 
 ! CHECK-LABEL: vec_sll_u2u2
@@ -618,30 +281,12 @@ subroutine vec_sll_u2u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_sll(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsl(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsl(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
 end subroutine vec_sll_u2u2
 
 ! CHECK-LABEL: vec_sll_u4u2
@@ -650,25 +295,10 @@ subroutine vec_sll_u4u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_sll(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsl(%[[varg1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vsl(%[[arg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
 end subroutine vec_sll_u4u2
 
 ! CHECK-LABEL: vec_sll_u1u4
@@ -677,27 +307,11 @@ subroutine vec_sll_u1u4(arg1, arg2)
   vector(unsigned(4)) :: arg2
   r = vec_sll(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsl(%[[bc1]], %[[varg2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsl(%[[varg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR:    %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[arg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[arg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
 end subroutine vec_sll_u1u4
 
 ! CHECK-LABEL: vec_sll_u2u4
@@ -706,27 +320,11 @@ subroutine vec_sll_u2u4(arg1, arg2)
   vector(unsigned(4)) :: arg2
   r = vec_sll(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsl(%[[bc1]], %[[varg2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsl(%[[varg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[arg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[varg1]], <4 x i32> %[[arg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
 end subroutine vec_sll_u2u4
 
 ! CHECK-LABEL: vec_sll_u4u4
@@ -735,22 +333,9 @@ subroutine vec_sll_u4u4(arg1, arg2)
   vector(unsigned(4)) :: arg2
   r = vec_sll(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsl(%[[varg1]], %[[varg2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vsl(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[arg1]], <4 x i32> %[[arg2]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vsl(<4 x i32> %[[arg1]], <4 x i32> %[[arg2]])
 end subroutine vec_sll_u4u4
 
 !----------------------
@@ -763,30 +348,12 @@ subroutine vec_slo_i1u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_slo(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vslo(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<16xi8>) -> !fir.vector<16:i8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vslo(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
 end subroutine vec_slo_i1u1
 
 ! CHECK-LABEL: vec_slo_i2u1
@@ -795,30 +362,12 @@ subroutine vec_slo_i2u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_slo(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vslo(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<8xi16>) -> !fir.vector<8:i16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vslo(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
 end subroutine vec_slo_i2u1
 
 ! CHECK-LABEL: vec_slo_i4u1
@@ -827,22 +376,10 @@ subroutine vec_slo_i4u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_slo(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vslo(%[[varg1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vslo(%[[arg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
 end subroutine vec_slo_i4u1
 
 ! CHECK-LABEL: vec_slo_u1u1
@@ -851,30 +388,12 @@ subroutine vec_slo_u1u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_slo(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vslo(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vslo(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
 end subroutine vec_slo_u1u1
 
 ! CHECK-LABEL: vec_slo_u2u1
@@ -883,30 +402,12 @@ subroutine vec_slo_u2u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_slo(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vslo(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vslo(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
 end subroutine vec_slo_u2u1
 
 ! CHECK-LABEL: vec_slo_u4u1
@@ -915,25 +416,10 @@ subroutine vec_slo_u4u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_slo(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vslo(%[[varg1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vslo(%[[arg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
 end subroutine vec_slo_u4u1
 
 ! CHECK-LABEL: vec_slo_r4u1
@@ -942,30 +428,12 @@ subroutine vec_slo_r4u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_slo(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<4xf32> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vslo(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<4xf32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<4xf32>) -> !fir.vector<4:f32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vslo(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<4xf32>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <4 x float> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <4 x float>
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <4 x float> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <4 x float>
 end subroutine vec_slo_r4u1
 
 ! CHECK-LABEL: vec_slo_i1u2
@@ -974,30 +442,12 @@ subroutine vec_slo_i1u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_slo(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vslo(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<16xi8>) -> !fir.vector<16:i8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vslo(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
 end subroutine vec_slo_i1u2
 
 ! CHECK-LABEL: vec_slo_i2u2
@@ -1006,30 +456,12 @@ subroutine vec_slo_i2u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_slo(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vslo(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<8xi16>) -> !fir.vector<8:i16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vslo(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
 end subroutine vec_slo_i2u2
 
 ! CHECK-LABEL: vec_slo_i4u2
@@ -1038,22 +470,10 @@ subroutine vec_slo_i4u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_slo(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vslo(%[[varg1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vslo(%[[arg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
 end subroutine vec_slo_i4u2
 
 ! CHECK-LABEL: vec_slo_u1u2
@@ -1062,30 +482,12 @@ subroutine vec_slo_u1u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_slo(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vslo(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vslo(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
 end subroutine vec_slo_u1u2
 
 ! CHECK-LABEL: vec_slo_u2u2
@@ -1094,30 +496,12 @@ subroutine vec_slo_u2u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_slo(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vslo(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vslo(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
 
 end subroutine vec_slo_u2u2
 
@@ -1127,25 +511,10 @@ subroutine vec_slo_u4u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_slo(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vslo(%[[varg1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vslo(%[[arg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
 end subroutine vec_slo_u4u2
 
 ! CHECK-LABEL: vec_slo_r4u2
@@ -1154,30 +523,12 @@ subroutine vec_slo_r4u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_slo(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<4xf32> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vslo(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<4xf32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<4xf32>) -> !fir.vector<4:f32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vslo(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<4xf32>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <4 x float> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <4 x float>
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <4 x float> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vslo(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <4 x float>
 end subroutine vec_slo_r4u2
 
 !----------------------
@@ -1189,26 +540,10 @@ subroutine vec_sr_i1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_sr(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %c8_i8 = arith.constant 8 : i8
-! CHECK-FIR: %[[cv:.*]] = vector.broadcast %c8_i8 : i8 to vector<16xi8>
-! CHECK-FIR: %[[msk:.*]] = arith.remui %[[varg2]], %[[cv]] : vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = arith.shrui %[[varg1]], %[[msk]] : vector<16xi8>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[cv:.*]] = llvm.mlir.constant(dense<8> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[msk:.*]] = llvm.urem %[[arg2]], %[[cv]]  : vector<16xi8>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.lshr %[[arg1]], %[[msk]]  : vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[msk:.*]] = urem <16 x i8> %[[arg2]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
-! CHECK: %7 = lshr <16 x i8> %[[arg1]], %[[msk]]
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[msk:.*]] = urem <16 x i8> %[[arg2]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+! LLVMIR: %7 = lshr <16 x i8> %[[arg1]], %[[msk]]
 end subroutine vec_sr_i1
 
 ! CHECK-LABEL: vec_sr_i2
@@ -1217,26 +552,10 @@ subroutine vec_sr_i2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_sr(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %c16_i16 = arith.constant 16 : i16
-! CHECK-FIR: %[[cv:.*]] = vector.broadcast %c16_i16 : i16 to vector<8xi16>
-! CHECK-FIR: %[[msk:.*]] = arith.remui %[[varg2]], %[[cv]] : vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = arith.shrui %[[varg1]], %[[msk]] : vector<8xi16>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:i16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[cv:.*]] = llvm.mlir.constant(dense<16> : vector<8xi16>) : vector<8xi16>
-! CHECK-LLVMIR: %[[msk:.*]] = llvm.urem %[[arg2]], %[[cv]]  : vector<8xi16>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.lshr %[[arg1]], %[[msk]]  : vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[msk:.*]] = urem <8 x i16> %[[arg2]], <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-! CHECK: %7 = lshr <8 x i16> %[[arg1]], %[[msk]]
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[msk:.*]] = urem <8 x i16> %[[arg2]], <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+! LLVMIR: %7 = lshr <8 x i16> %[[arg1]], %[[msk]]
 end subroutine vec_sr_i2
 
 ! CHECK-LABEL: vec_sr_i4
@@ -1245,26 +564,10 @@ subroutine vec_sr_i4(arg1, arg2)
   vector(unsigned(4)) :: arg2
   r = vec_sr(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %c32_i32 = arith.constant 32 : i32
-! CHECK-FIR: %[[cv:.*]] = vector.broadcast %c32_i32 : i32 to vector<4xi32>
-! CHECK-FIR: %[[msk:.*]] = arith.remui %[[varg2]], %[[cv]] : vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = arith.shrui %[[varg1]], %[[msk]] : vector<4xi32>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:i32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[cv:.*]] = llvm.mlir.constant(dense<32> : vector<4xi32>) : vector<4xi32>
-! CHECK-LLVMIR: %[[msk:.*]] = llvm.urem %[[arg2]], %[[cv]]  : vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.lshr %[[arg1]], %[[msk]]  : vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[msk:.*]] = urem <4 x i32> %[[arg2]], <i32 32, i32 32, i32 32, i32 32>
-! CHECK: %7 = lshr <4 x i32> %[[arg1]], %[[msk]]
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[msk:.*]] = urem <4 x i32> %[[arg2]], <i32 32, i32 32, i32 32, i32 32>
+! LLVMIR: %7 = lshr <4 x i32> %[[arg1]], %[[msk]]
 end subroutine vec_sr_i4
 
 ! CHECK-LABEL: vec_sr_i8
@@ -1273,26 +576,10 @@ subroutine vec_sr_i8(arg1, arg2)
   vector(unsigned(8)) :: arg2
   r = vec_sr(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %c64_i64 = arith.constant 64 : i64
-! CHECK-FIR: %[[cv:.*]] = vector.broadcast %c64_i64 : i64 to vector<2xi64>
-! CHECK-FIR: %[[msk:.*]] = arith.remui %[[varg2]], %[[cv]] : vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = arith.shrui %[[varg1]], %[[msk]] : vector<2xi64>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:i64>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[cv:.*]] = llvm.mlir.constant(dense<64> : vector<2xi64>) : vector<2xi64>
-! CHECK-LLVMIR: %[[msk:.*]] = llvm.urem %[[arg2]], %[[cv]]  : vector<2xi64>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.lshr %[[arg1]], %[[msk]]  : vector<2xi64>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[msk:.*]] = urem <2 x i64> %[[arg2]], <i64 64, i64 64>
-! CHECK: %7 = lshr <2 x i64> %[[arg1]], %[[msk]]
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[msk:.*]] = urem <2 x i64> %[[arg2]], <i64 64, i64 64>
+! LLVMIR: %7 = lshr <2 x i64> %[[arg1]], %[[msk]]
 end subroutine vec_sr_i8
 
 ! CHECK-LABEL: vec_sr_u1
@@ -1301,26 +588,10 @@ subroutine vec_sr_u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_sr(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %c8_i8 = arith.constant 8 : i8
-! CHECK-FIR: %[[cv:.*]] = vector.broadcast %c8_i8 : i8 to vector<16xi8>
-! CHECK-FIR: %[[msk:.*]] = arith.remui %[[varg2]], %[[cv]] : vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = arith.shrui %[[varg1]], %[[msk]] : vector<16xi8>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[cv:.*]] = llvm.mlir.constant(dense<8> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[msk:.*]] = llvm.urem %[[arg2]], %[[cv]]  : vector<16xi8>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.lshr %[[arg1]], %[[msk]]  : vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[msk:.*]] = urem <16 x i8> %[[arg2]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
-! CHECK: %7 = lshr <16 x i8> %[[arg1]], %[[msk]]
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[msk:.*]] = urem <16 x i8> %[[arg2]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+! LLVMIR: %7 = lshr <16 x i8> %[[arg1]], %[[msk]]
 end subroutine vec_sr_u1
 
 ! CHECK-LABEL: vec_sr_u2
@@ -1329,26 +600,10 @@ subroutine vec_sr_u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_sr(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %c16_i16 = arith.constant 16 : i16
-! CHECK-FIR: %[[cv:.*]] = vector.broadcast %c16_i16 : i16 to vector<8xi16>
-! CHECK-FIR: %[[msk:.*]] = arith.remui %[[varg2]], %[[cv]] : vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = arith.shrui %[[varg1]], %[[msk]] : vector<8xi16>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[cv:.*]] = llvm.mlir.constant(dense<16> : vector<8xi16>) : vector<8xi16>
-! CHECK-LLVMIR: %[[msk:.*]] = llvm.urem %[[arg2]], %[[cv]]  : vector<8xi16>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.lshr %[[arg1]], %[[msk]]  : vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[msk:.*]] = urem <8 x i16> %[[arg2]], <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-! CHECK: %7 = lshr <8 x i16> %[[arg1]], %[[msk]]
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[msk:.*]] = urem <8 x i16> %[[arg2]], <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+! LLVMIR: %7 = lshr <8 x i16> %[[arg1]], %[[msk]]
 end subroutine vec_sr_u2
 
 ! CHECK-LABEL: vec_sr_u4
@@ -1357,26 +612,10 @@ subroutine vec_sr_u4(arg1, arg2)
   vector(unsigned(4)) :: arg2
   r = vec_sr(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %c32_i32 = arith.constant 32 : i32
-! CHECK-FIR: %[[cv:.*]] = vector.broadcast %c32_i32 : i32 to vector<4xi32>
-! CHECK-FIR: %[[msk:.*]] = arith.remui %[[varg2]], %[[cv]] : vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = arith.shrui %[[varg1]], %[[msk]] : vector<4xi32>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[cv:.*]] = llvm.mlir.constant(dense<32> : vector<4xi32>) : vector<4xi32>
-! CHECK-LLVMIR: %[[msk:.*]] = llvm.urem %[[arg2]], %[[cv]]  : vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.lshr %[[arg1]], %[[msk]]  : vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[msk:.*]] = urem <4 x i32> %[[arg2]], <i32 32, i32 32, i32 32, i32 32>
-! CHECK: %7 = lshr <4 x i32> %[[arg1]], %[[msk]]
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[msk:.*]] = urem <4 x i32> %[[arg2]], <i32 32, i32 32, i32 32, i32 32>
+! LLVMIR: %7 = lshr <4 x i32> %[[arg1]], %[[msk]]
 end subroutine vec_sr_u4
 
 ! CHECK-LABEL: vec_sr_u8
@@ -1385,26 +624,10 @@ subroutine vec_sr_u8(arg1, arg2)
   vector(unsigned(8)) :: arg2
   r = vec_sr(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %c64_i64 = arith.constant 64 : i64
-! CHECK-FIR: %[[cv:.*]] = vector.broadcast %c64_i64 : i64 to vector<2xi64>
-! CHECK-FIR: %[[msk:.*]] = arith.remui %[[varg2]], %[[cv]] : vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = arith.shrui %[[varg1]], %[[msk]] : vector<2xi64>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[cv:.*]] = llvm.mlir.constant(dense<64> : vector<2xi64>) : vector<2xi64>
-! CHECK-LLVMIR: %[[msk:.*]] = llvm.urem %[[arg2]], %[[cv]]  : vector<2xi64>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.lshr %[[arg1]], %[[msk]]  : vector<2xi64>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[msk:.*]] = urem <2 x i64> %[[arg2]], <i64 64, i64 64>
-! CHECK: %7 = lshr <2 x i64> %[[arg1]], %[[msk]]
+! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
+! LLVMIR: %[[msk:.*]] = urem <2 x i64> %[[arg2]], <i64 64, i64 64>
+! LLVMIR: %7 = lshr <2 x i64> %[[arg1]], %[[msk]]
 end subroutine vec_sr_u8
 
 !----------------------
@@ -1416,30 +639,12 @@ subroutine vec_srl_i1u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_srl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsr(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<16xi8>) -> !fir.vector<16:i8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsr(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
 end subroutine vec_srl_i1u1
 
 ! CHECK-LABEL: vec_srl_i2u1
@@ -1448,30 +653,12 @@ subroutine vec_srl_i2u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_srl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsr(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<8xi16>) -> !fir.vector<8:i16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsr(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
 end subroutine vec_srl_i2u1
 
 ! CHECK-LABEL: vec_srl_i4u1
@@ -1480,22 +667,10 @@ subroutine vec_srl_i4u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_srl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsr(%[[varg1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsr(%[[arg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
 end subroutine vec_srl_i4u1
 
 ! CHECK-LABEL: vec_srl_i1u2
@@ -1504,30 +679,12 @@ subroutine vec_srl_i1u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_srl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsr(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<16xi8>) -> !fir.vector<16:i8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsr(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR:    %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
 end subroutine vec_srl_i1u2
 
 ! CHECK-LABEL: vec_srl_i2u2
@@ -1536,30 +693,12 @@ subroutine vec_srl_i2u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_srl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsr(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<8xi16>) -> !fir.vector<8:i16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsr(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
 end subroutine vec_srl_i2u2
 
 ! CHECK-LABEL: vec_srl_i4u2
@@ -1568,22 +707,10 @@ subroutine vec_srl_i4u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_srl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsr(%[[varg1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsr(%[[arg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
 end subroutine vec_srl_i4u2
 
 ! CHECK-LABEL: vec_srl_i1u4
@@ -1592,27 +719,11 @@ subroutine vec_srl_i1u4(arg1, arg2)
   vector(unsigned(4)) :: arg2
   r = vec_srl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsr(%[[bc1]], %[[varg2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<16xi8>) -> !fir.vector<16:i8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsr(%[[varg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR:    %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[arg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[arg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
 end subroutine vec_srl_i1u4
 
 ! CHECK-LABEL: vec_srl_i2u4
@@ -1621,27 +732,11 @@ subroutine vec_srl_i2u4(arg1, arg2)
   vector(unsigned(4)) :: arg2
   r = vec_srl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsr(%[[bc1]], %[[varg2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<8xi16>) -> !fir.vector<8:i16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsr(%[[varg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[arg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[arg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
 end subroutine vec_srl_i2u4
 
 ! CHECK-LABEL: vec_srl_i4u4
@@ -1650,19 +745,9 @@ subroutine vec_srl_i4u4(arg1, arg2)
   vector(unsigned(4)) :: arg2
   r = vec_srl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsr(%[[varg1]], %[[varg2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsr(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[arg1]], <4 x i32> %[[arg2]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[arg1]], <4 x i32> %[[arg2]])
 end subroutine vec_srl_i4u4
 
 ! CHECK-LABEL: vec_srl_u1u1
@@ -1671,30 +756,12 @@ subroutine vec_srl_u1u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_srl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsr(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsr(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR:    %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
 end subroutine vec_srl_u1u1
 
 ! CHECK-LABEL: vec_srl_u2u1
@@ -1703,30 +770,12 @@ subroutine vec_srl_u2u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_srl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsr(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsr(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
 end subroutine vec_srl_u2u1
 
 ! CHECK-LABEL: vec_srl_u4u1
@@ -1735,25 +784,10 @@ subroutine vec_srl_u4u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_srl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsr(%[[varg1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vsr(%[[arg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
 end subroutine vec_srl_u4u1
 
 ! CHECK-LABEL: vec_srl_u1u2
@@ -1762,30 +796,12 @@ subroutine vec_srl_u1u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_srl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsr(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsr(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR:    %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
 end subroutine vec_srl_u1u2
 
 ! CHECK-LABEL: vec_srl_u2u2
@@ -1794,30 +810,12 @@ subroutine vec_srl_u2u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_srl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsr(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsr(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
 end subroutine vec_srl_u2u2
 
 ! CHECK-LABEL: vec_srl_u4u2
@@ -1826,25 +824,10 @@ subroutine vec_srl_u4u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_srl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsr(%[[varg1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vsr(%[[arg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
 end subroutine vec_srl_u4u2
 
 ! CHECK-LABEL: vec_srl_u1u4
@@ -1853,27 +836,11 @@ subroutine vec_srl_u1u4(arg1, arg2)
   vector(unsigned(4)) :: arg2
   r = vec_srl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsr(%[[bc1]], %[[varg2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsr(%[[varg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR:    %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[arg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[arg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
 end subroutine vec_srl_u1u4
 
 ! CHECK-LABEL: vec_srl_u2u4
@@ -1882,27 +849,11 @@ subroutine vec_srl_u2u4(arg1, arg2)
   vector(unsigned(4)) :: arg2
   r = vec_srl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsr(%[[bc1]], %[[varg2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsr(%[[varg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[arg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[varg1]], <4 x i32> %[[arg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
 end subroutine vec_srl_u2u4
 
 ! CHECK-LABEL: vec_srl_u4u4
@@ -1911,22 +862,9 @@ subroutine vec_srl_u4u4(arg1, arg2)
   vector(unsigned(4)) :: arg2
   r = vec_srl(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsr(%[[varg1]], %[[varg2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vsr(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[arg1]], <4 x i32> %[[arg2]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vsr(<4 x i32> %[[arg1]], <4 x i32> %[[arg2]])
 end subroutine vec_srl_u4u4
 
 !----------------------
@@ -1939,30 +877,12 @@ subroutine vec_sro_i1u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_sro(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsro(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<16xi8>) -> !fir.vector<16:i8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsro(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
 end subroutine vec_sro_i1u1
 
 ! CHECK-LABEL: vec_sro_i2u1
@@ -1971,30 +891,12 @@ subroutine vec_sro_i2u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_sro(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsro(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<8xi16>) -> !fir.vector<8:i16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsro(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
 end subroutine vec_sro_i2u1
 
 ! CHECK-LABEL: vec_sro_i4u1
@@ -2003,22 +905,10 @@ subroutine vec_sro_i4u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_sro(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vsro(%[[varg1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vsro(%[[arg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
 end subroutine vec_sro_i4u1
 
 ! CHECK-LABEL: vec_sro_u1u1
@@ -2027,30 +917,12 @@ subroutine vec_sro_u1u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_sro(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsro(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsro(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
 end subroutine vec_sro_u1u1
 
 ! CHECK-LABEL: vec_sro_u2u1
@@ -2059,30 +931,12 @@ subroutine vec_sro_u2u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_sro(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsro(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsro(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
 end subroutine vec_sro_u2u1
 
 ! CHECK-LABEL: vec_sro_u4u1
@@ -2091,25 +945,10 @@ subroutine vec_sro_u4u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_sro(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsro(%[[varg1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vsro(%[[arg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
 end subroutine vec_sro_u4u1
 
 ! CHECK-LABEL: vec_sro_r4u1
@@ -2118,30 +957,12 @@ subroutine vec_sro_r4u1(arg1, arg2)
   vector(unsigned(1)) :: arg2
   r = vec_sro(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<4xf32> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsro(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<4xf32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<4xf32>) -> !fir.vector<4:f32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsro(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<4xf32>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <4 x float> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <4 x float>
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <4 x float> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <16 x i8> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <4 x float>
 end subroutine vec_sro_r4u1
 
 !-------------------------------------
@@ -2152,30 +973,12 @@ subroutine vec_sro_i1u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_sro(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsro(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<16xi8>) -> !fir.vector<16:i8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsro(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
 end subroutine vec_sro_i1u2
 
 ! CHECK-LABEL: vec_sro_i2u2
@@ -2184,30 +987,12 @@ subroutine vec_sro_i2u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_sro(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsro(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<8xi16>) -> !fir.vector<8:i16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsro(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
 end subroutine vec_sro_i2u2
 
 ! CHECK-LABEL: vec_sro_i4u2
@@ -2216,22 +1001,10 @@ subroutine vec_sro_i4u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_sro(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vsro(%[[varg1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vsro(%[[arg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
 end subroutine vec_sro_i4u2
 
 ! CHECK-LABEL: vec_sro_u1u2
@@ -2240,30 +1013,12 @@ subroutine vec_sro_u1u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_sro(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsro(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsro(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <16 x i8>
 end subroutine vec_sro_u1u2
 
 ! CHECK-LABEL: vec_sro_u2u2
@@ -2272,30 +1027,12 @@ subroutine vec_sro_u2u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_sro(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsro(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsro(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <8 x i16>
 
 end subroutine vec_sro_u2u2
 
@@ -2305,25 +1042,10 @@ subroutine vec_sro_u4u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_sro(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsro(%[[varg1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vsro(%[[arg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[arg1]], <4 x i32> %[[varg2]])
 end subroutine vec_sro_u4u2
 
 ! CHECK-LABEL: vec_sro_r4u2
@@ -2332,28 +1054,10 @@ subroutine vec_sro_r4u2(arg1, arg2)
   vector(unsigned(2)) :: arg2
   r = vec_sro(arg1, arg2)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<4xf32> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vsro(%[[bc1]], %[[bc2]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcres:.*]] = vector.bitcast %[[vres]] : vector<4xi32> to vector<4xf32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcres]] : (vector<4xf32>) -> !fir.vector<4:f32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load {{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[varg1:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<4xi32>
-! CHECK-LLVMIR: %[[varg2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vsro(%[[varg1]], %[[varg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[res]] : vector<4xi32> to vector<4xf32>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[varg1:.*]] = bitcast <4 x float> %[[arg1]] to <4 x i32>
-! CHECK: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <4 x float>
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[varg1:.*]] = bitcast <4 x float> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[varg2:.*]] = bitcast <8 x i16> %[[arg2]] to <4 x i32>
+! LLVMIR: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vsro(<4 x i32> %[[varg1]], <4 x i32> %[[varg2]])
+! LLVMIR: %{{[0-9]+}} = bitcast <4 x i32> %[[res]] to <4 x float>
 end subroutine vec_sro_r4u2

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-splat-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-splat-elem-order.f90
index 23022a5241b6ea1..d95e9828531cd0f 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-splat-elem-order.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-splat-elem-order.f90
@@ -1,22 +1,10 @@
-! RUN: %flang_fc1 -emit-fir %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="FIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="LLVMIR" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
 ! CHECK-LABEL: vec_splat_testf32i64
 subroutine vec_splat_testf32i64(x)
   vector(real(4)) :: x, y
   y = vec_splat(x, 0_8)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i64
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! FIR: %[[c:.*]] = arith.constant 4 : i64
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i64
-! FIR: %[[c2:.*]] = arith.constant 3 : i64
-! FIR: %[[sub:.*]] = llvm.sub %[[c2]], %[[u]]  : i64
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[sub]] : i64] : vector<4xf32>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<4xf32>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
 
 ! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <4 x float> %[[x]], i64 3
@@ -29,17 +17,6 @@ end subroutine vec_splat_testf32i64
 subroutine vec_splat_testu8i16(x)
   vector(unsigned(1)) :: x, y
   y = vec_splat(x, 0_2)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:ui8>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i16
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! FIR: %[[c:.*]] = arith.constant 16 : i16
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i16
-! FIR: %[[c2:.*]] = arith.constant 15 : i16
-! FIR: %[[sub:.*]] = llvm.sub %[[c2]], %[[u]]  : i16
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[sub]] : i16] : vector<16xi8>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<16xi8>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:ui8>>
 
 ! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <16 x i8> %[[x]], i16 15

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-splat.f90 b/flang/test/Lower/PowerPC/ppc-vec-splat.f90
index 92ec128f17b5090..e21555781df29a3 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-splat.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-splat.f90
@@ -1,6 +1,4 @@
-! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="FIR" %s
-! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="MLIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
 !----------------
@@ -11,26 +9,6 @@
 subroutine vec_splat_testi8i8(x)
   vector(integer(1)) :: x, y
   y = vec_splat(x, 0_1)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:i8>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i8
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! FIR: %[[c:.*]] = arith.constant 16 : i8
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i8
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i8] : vector<16xi8>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<16xi8>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:i8>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i8) : i8
-! MLIR: %[[c:.*]] = llvm.mlir.constant(16 : i8) : i8
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i8
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<16xi8>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<16xi8>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<16xi8>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<16xi8>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<16xi8>>
 
 ! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <16 x i8> %[[x]], i8 0
@@ -43,26 +21,6 @@ end subroutine vec_splat_testi8i8
 subroutine vec_splat_testi8i16(x)
   vector(integer(1)) :: x, y
   y = vec_splat(x, 0_2)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:i8>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i16
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! FIR: %[[c:.*]] = arith.constant 16 : i16
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i16
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i16] : vector<16xi8>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<16xi8>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:i8>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i16) : i16
-! MLIR: %[[c:.*]] = llvm.mlir.constant(16 : i16) : i16
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i16
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<16xi8>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<16xi8>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<16xi8>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<16xi8>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<16xi8>>
 
 ! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <16 x i8> %[[x]], i16 0
@@ -75,26 +33,6 @@ end subroutine vec_splat_testi8i16
 subroutine vec_splat_testi8i32(x)
   vector(integer(1)) :: x, y
   y = vec_splat(x, 0_4)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:i8>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i32
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! FIR: %[[c:.*]] = arith.constant 16 : i32
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i32
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i32] : vector<16xi8>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<16xi8>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:i8>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[c:.*]] = llvm.mlir.constant(16 : i32) : i32
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i32
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<16xi8>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<16xi8>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<16xi8>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<16xi8>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<16xi8>>
 
 ! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <16 x i8> %[[x]], i32 0
@@ -107,26 +45,6 @@ end subroutine vec_splat_testi8i32
 subroutine vec_splat_testi8i64(x)
   vector(integer(1)) :: x, y
   y = vec_splat(x, 0_8)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:i8>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i64
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! FIR: %[[c:.*]] = arith.constant 16 : i64
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i64
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i64] : vector<16xi8>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<16xi8>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:i8>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i64) : i64
-! MLIR: %[[c:.*]] = llvm.mlir.constant(16 : i64) : i64
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i64
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<16xi8>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<16xi8>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<16xi8>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<16xi8>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<16xi8>>
 
 ! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <16 x i8> %[[x]], i64 0
@@ -139,26 +57,6 @@ end subroutine vec_splat_testi8i64
 subroutine vec_splat_testi16i8(x)
   vector(integer(2)) :: x, y
   y = vec_splat(x, 0_1)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i8
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! FIR: %[[c:.*]] = arith.constant 8 : i8
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i8
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i8] : vector<8xi16>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<8xi16>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<8:i16>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i8) : i8
-! MLIR: %[[c:.*]] = llvm.mlir.constant(8 : i8) : i8
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i8
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<8xi16>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<8xi16>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<8xi16>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0, 0, 0, 0, 0] : vector<8xi16>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<8xi16>>
 
 ! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <8 x i16> %[[x]], i8 0
@@ -171,26 +69,6 @@ end subroutine vec_splat_testi16i8
 subroutine vec_splat_testi16i16(x)
   vector(integer(2)) :: x, y
   y = vec_splat(x, 0_2)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i16
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! FIR: %[[c:.*]] = arith.constant 8 : i16
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i16
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i16] : vector<8xi16>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<8xi16>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<8:i16>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i16) : i16
-! MLIR: %[[c:.*]] = llvm.mlir.constant(8 : i16) : i16
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i16
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<8xi16>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<8xi16>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<8xi16>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0, 0, 0, 0, 0] : vector<8xi16>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<8xi16>>
 
 ! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <8 x i16> %[[x]], i16 0
@@ -203,26 +81,6 @@ end subroutine vec_splat_testi16i16
 subroutine vec_splat_testi16i32(x)
   vector(integer(2)) :: x, y
   y = vec_splat(x, 0_4)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i32
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! FIR: %[[c:.*]] = arith.constant 8 : i32
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i32
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i32] : vector<8xi16>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<8xi16>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<8:i16>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[c:.*]] = llvm.mlir.constant(8 : i32) : i32
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i32
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<8xi16>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<8xi16>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<8xi16>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0, 0, 0, 0, 0] : vector<8xi16>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<8xi16>>
 
 ! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <8 x i16> %[[x]], i32 0
@@ -235,26 +93,6 @@ end subroutine vec_splat_testi16i32
 subroutine vec_splat_testi16i64(x)
   vector(integer(2)) :: x, y
   y = vec_splat(x, 0_8)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i64
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! FIR: %[[c:.*]] = arith.constant 8 : i64
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i64
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i64] : vector<8xi16>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<8xi16>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<8:i16>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i64) : i64
-! MLIR: %[[c:.*]] = llvm.mlir.constant(8 : i64) : i64
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i64
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<8xi16>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<8xi16>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<8xi16>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0, 0, 0, 0, 0] : vector<8xi16>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<8xi16>>
 
 ! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <8 x i16> %[[x]], i64 0
@@ -267,26 +105,6 @@ end subroutine vec_splat_testi16i64
 subroutine vec_splat_testi32i8(x)
   vector(integer(4)) :: x, y
   y = vec_splat(x, 0_1)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i8
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! FIR: %[[c:.*]] = arith.constant 4 : i8
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i8
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i8] : vector<4xi32>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<4xi32>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i8) : i8
-! MLIR: %[[c:.*]] = llvm.mlir.constant(4 : i8) : i8
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i8
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<4xi32>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xi32>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<4xi32>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0] : vector<4xi32>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
 
 ! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <4 x i32> %[[x]], i8 0
@@ -299,26 +117,6 @@ end subroutine vec_splat_testi32i8
 subroutine vec_splat_testi32i16(x)
   vector(integer(4)) :: x, y
   y = vec_splat(x, 0_2)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i16
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! FIR: %[[c:.*]] = arith.constant 4 : i16
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i16
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i16] : vector<4xi32>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<4xi32>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i16) : i16
-! MLIR: %[[c:.*]] = llvm.mlir.constant(4 : i16) : i16
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i16
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<4xi32>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xi32>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<4xi32>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0] : vector<4xi32>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
 
 ! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <4 x i32> %[[x]], i16 0
@@ -331,26 +129,6 @@ end subroutine vec_splat_testi32i16
 subroutine vec_splat_testi32i32(x)
   vector(integer(4)) :: x, y
   y = vec_splat(x, 0_4)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i32
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! FIR: %[[c:.*]] = arith.constant 4 : i32
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i32
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i32] : vector<4xi32>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<4xi32>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[c:.*]] = llvm.mlir.constant(4 : i32) : i32
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i32
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<4xi32>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xi32>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<4xi32>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0] : vector<4xi32>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
 
 ! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <4 x i32> %[[x]], i32 0
@@ -363,26 +141,6 @@ end subroutine vec_splat_testi32i32
 subroutine vec_splat_testi32i64(x)
   vector(integer(4)) :: x, y
   y = vec_splat(x, 0_8)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i64
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! FIR: %[[c:.*]] = arith.constant 4 : i64
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i64
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i64] : vector<4xi32>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<4xi32>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i64) : i64
-! MLIR: %[[c:.*]] = llvm.mlir.constant(4 : i64) : i64
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i64
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<4xi32>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xi32>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<4xi32>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0] : vector<4xi32>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
 
 ! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <4 x i32> %[[x]], i64 0
@@ -395,26 +153,6 @@ end subroutine vec_splat_testi32i64
 subroutine vec_splat_testi64i8(x)
   vector(integer(8)) :: x, y
   y = vec_splat(x, 0_1)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:i64>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i8
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! FIR: %[[c:.*]] = arith.constant 2 : i8
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i8
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i8] : vector<2xi64>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<2xi64>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i8) : i8
-! MLIR: %[[c:.*]] = llvm.mlir.constant(2 : i8) : i8
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i8
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<2xi64>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<2xi64>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<2xi64>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0] : vector<2xi64>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<2xi64>>
 
 ! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <2 x i64> %[[x]], i8 0
@@ -427,26 +165,6 @@ end subroutine vec_splat_testi64i8
 subroutine vec_splat_testi64i16(x)
   vector(integer(8)) :: x, y
   y = vec_splat(x, 0_2)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:i64>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i16
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! FIR: %[[c:.*]] = arith.constant 2 : i16
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i16
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i16] : vector<2xi64>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<2xi64>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i16) : i16
-! MLIR: %[[c:.*]] = llvm.mlir.constant(2 : i16) : i16
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i16
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<2xi64>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<2xi64>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<2xi64>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0] : vector<2xi64>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<2xi64>>
 
 ! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <2 x i64> %[[x]], i16 0
@@ -459,26 +177,6 @@ end subroutine vec_splat_testi64i16
 subroutine vec_splat_testi64i32(x)
   vector(integer(8)) :: x, y
   y = vec_splat(x, 0_4)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:i64>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i32
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! FIR: %[[c:.*]] = arith.constant 2 : i32
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i32
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i32] : vector<2xi64>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<2xi64>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[c:.*]] = llvm.mlir.constant(2 : i32) : i32
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i32
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<2xi64>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<2xi64>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<2xi64>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0] : vector<2xi64>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<2xi64>>
 
 ! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <2 x i64> %[[x]], i32 0
@@ -491,26 +189,6 @@ end subroutine vec_splat_testi64i32
 subroutine vec_splat_testi64i64(x)
   vector(integer(8)) :: x, y
   y = vec_splat(x, 0_8)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:i64>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i64
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! FIR: %[[c:.*]] = arith.constant 2 : i64
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i64
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i64] : vector<2xi64>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<2xi64>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i64) : i64
-! MLIR: %[[c:.*]] = llvm.mlir.constant(2 : i64) : i64
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i64
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<2xi64>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<2xi64>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<2xi64>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0] : vector<2xi64>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<2xi64>>
 
 ! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <2 x i64> %[[x]], i64 0
@@ -523,26 +201,6 @@ end subroutine vec_splat_testi64i64
 subroutine vec_splat_testf32i8(x)
   vector(real(4)) :: x, y
   y = vec_splat(x, 0_1)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i8
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! FIR: %[[c:.*]] = arith.constant 4 : i8
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i8
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i8] : vector<4xf32>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<4xf32>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xf32>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i8) : i8
-! MLIR: %[[c:.*]] = llvm.mlir.constant(4 : i8) : i8
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i8
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<4xf32>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xf32>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<4xf32>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0] : vector<4xf32>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<4xf32>>
 
 ! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <4 x float> %[[x]], i8 0
@@ -555,26 +213,6 @@ end subroutine vec_splat_testf32i8
 subroutine vec_splat_testf32i16(x)
   vector(real(4)) :: x, y
   y = vec_splat(x, 0_2)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i16
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! FIR: %[[c:.*]] = arith.constant 4 : i16
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i16
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i16] : vector<4xf32>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<4xf32>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xf32>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i16) : i16
-! MLIR: %[[c:.*]] = llvm.mlir.constant(4 : i16) : i16
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i16
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<4xf32>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xf32>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<4xf32>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0] : vector<4xf32>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<4xf32>>
 
 ! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <4 x float> %[[x]], i16 0
@@ -587,26 +225,6 @@ end subroutine vec_splat_testf32i16
 subroutine vec_splat_testf32i32(x)
   vector(real(4)) :: x, y
   y = vec_splat(x, 0_4)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i32
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! FIR: %[[c:.*]] = arith.constant 4 : i32
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i32
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i32] : vector<4xf32>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<4xf32>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xf32>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[c:.*]] = llvm.mlir.constant(4 : i32) : i32
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i32
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<4xf32>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xf32>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<4xf32>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0] : vector<4xf32>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<4xf32>>
 
 ! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <4 x float> %[[x]], i32 0
@@ -619,26 +237,6 @@ end subroutine vec_splat_testf32i32
 subroutine vec_splat_testf32i64(x)
   vector(real(4)) :: x, y
   y = vec_splat(x, 0_8)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i64
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! FIR: %[[c:.*]] = arith.constant 4 : i64
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i64
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i64] : vector<4xf32>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<4xf32>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xf32>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i64) : i64
-! MLIR: %[[c:.*]] = llvm.mlir.constant(4 : i64) : i64
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i64
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<4xf32>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xf32>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<4xf32>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0] : vector<4xf32>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<4xf32>>
 
 ! LLVMIR: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <4 x float> %[[x]], i64 0
@@ -651,26 +249,6 @@ end subroutine vec_splat_testf32i64
 subroutine vec_splat_testf64i8(x)
   vector(real(8)) :: x, y
   y = vec_splat(x, 0_1)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:f64>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i8
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! FIR: %[[c:.*]] = arith.constant 2 : i8
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i8
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i8] : vector<2xf64>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<2xf64>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xf64>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i8) : i8
-! MLIR: %[[c:.*]] = llvm.mlir.constant(2 : i8) : i8
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i8
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<2xf64>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<2xf64>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<2xf64>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0] : vector<2xf64>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<2xf64>>
 
 ! LLVMIR: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <2 x double> %[[x]], i8 0
@@ -683,26 +261,6 @@ end subroutine vec_splat_testf64i8
 subroutine vec_splat_testf64i16(x)
   vector(real(8)) :: x, y
   y = vec_splat(x, 0_2)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:f64>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i16
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! FIR: %[[c:.*]] = arith.constant 2 : i16
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i16
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i16] : vector<2xf64>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<2xf64>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xf64>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i16) : i16
-! MLIR: %[[c:.*]] = llvm.mlir.constant(2 : i16) : i16
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i16
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<2xf64>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<2xf64>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<2xf64>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0] : vector<2xf64>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<2xf64>>
 
 ! LLVMIR: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <2 x double> %[[x]], i16 0
@@ -715,26 +273,6 @@ end subroutine vec_splat_testf64i16
 subroutine vec_splat_testf64i32(x)
   vector(real(8)) :: x, y
   y = vec_splat(x, 0_4)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:f64>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i32
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! FIR: %[[c:.*]] = arith.constant 2 : i32
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i32
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i32] : vector<2xf64>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<2xf64>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xf64>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[c:.*]] = llvm.mlir.constant(2 : i32) : i32
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i32
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<2xf64>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<2xf64>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<2xf64>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0] : vector<2xf64>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<2xf64>>
 
 ! LLVMIR: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <2 x double> %[[x]], i32 0
@@ -747,26 +285,6 @@ end subroutine vec_splat_testf64i32
 subroutine vec_splat_testf64i64(x)
   vector(real(8)) :: x, y
   y = vec_splat(x, 0_8)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:f64>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i64
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! FIR: %[[c:.*]] = arith.constant 2 : i64
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i64
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i64] : vector<2xf64>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<2xf64>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xf64>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i64) : i64
-! MLIR: %[[c:.*]] = llvm.mlir.constant(2 : i64) : i64
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i64
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<2xf64>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<2xf64>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<2xf64>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0] : vector<2xf64>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<2xf64>>
 
 ! LLVMIR: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <2 x double> %[[x]], i64 0
@@ -779,26 +297,6 @@ end subroutine vec_splat_testf64i64
 subroutine vec_splat_testu8i8(x)
   vector(unsigned(1)) :: x, y
   y = vec_splat(x, 0_1)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:ui8>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i8
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! FIR: %[[c:.*]] = arith.constant 16 : i8
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i8
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i8] : vector<16xi8>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<16xi8>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:ui8>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i8) : i8
-! MLIR: %[[c:.*]] = llvm.mlir.constant(16 : i8) : i8
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i8
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<16xi8>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<16xi8>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<16xi8>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<16xi8>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<16xi8>>
 
 ! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <16 x i8> %[[x]], i8 0
@@ -811,26 +309,6 @@ end subroutine vec_splat_testu8i8
 subroutine vec_splat_testu8i16(x)
   vector(unsigned(1)) :: x, y
   y = vec_splat(x, 0_2)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:ui8>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i16
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! FIR: %[[c:.*]] = arith.constant 16 : i16
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i16
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i16] : vector<16xi8>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<16xi8>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:ui8>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i16) : i16
-! MLIR: %[[c:.*]] = llvm.mlir.constant(16 : i16) : i16
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i16
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<16xi8>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<16xi8>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<16xi8>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<16xi8>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<16xi8>>
 
 ! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <16 x i8> %[[x]], i16 0
@@ -843,26 +321,6 @@ end subroutine vec_splat_testu8i16
 subroutine vec_splat_testu8i32(x)
   vector(unsigned(1)) :: x, y
   y = vec_splat(x, 0_4)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:ui8>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i32
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! FIR: %[[c:.*]] = arith.constant 16 : i32
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i32
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i32] : vector<16xi8>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<16xi8>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:ui8>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[c:.*]] = llvm.mlir.constant(16 : i32) : i32
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i32
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<16xi8>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<16xi8>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<16xi8>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<16xi8>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<16xi8>>
 
 ! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <16 x i8> %[[x]], i32 0
@@ -875,26 +333,6 @@ end subroutine vec_splat_testu8i32
 subroutine vec_splat_testu8i64(x)
   vector(unsigned(1)) :: x, y
   y = vec_splat(x, 0_8)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:ui8>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i64
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! FIR: %[[c:.*]] = arith.constant 16 : i64
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i64
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i64] : vector<16xi8>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<16xi8>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:ui8>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i64) : i64
-! MLIR: %[[c:.*]] = llvm.mlir.constant(16 : i64) : i64
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i64
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<16xi8>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<16xi8>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<16xi8>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<16xi8>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<16xi8>>
 
 ! LLVMIR: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <16 x i8> %[[x]], i64 0
@@ -907,26 +345,6 @@ end subroutine vec_splat_testu8i64
 subroutine vec_splat_testu16i8(x)
   vector(unsigned(2)) :: x, y
   y = vec_splat(x, 0_1)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:ui16>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i8
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! FIR: %[[c:.*]] = arith.constant 8 : i8
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i8
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i8] : vector<8xi16>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<8xi16>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<8:ui16>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i8) : i8
-! MLIR: %[[c:.*]] = llvm.mlir.constant(8 : i8) : i8
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i8
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<8xi16>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<8xi16>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<8xi16>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0, 0, 0, 0, 0] : vector<8xi16>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<8xi16>>
 
 ! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <8 x i16> %[[x]], i8 0
@@ -939,26 +357,6 @@ end subroutine vec_splat_testu16i8
 subroutine vec_splat_testu16i16(x)
   vector(unsigned(2)) :: x, y
   y = vec_splat(x, 0_2)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:ui16>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i16
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! FIR: %[[c:.*]] = arith.constant 8 : i16
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i16
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i16] : vector<8xi16>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<8xi16>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<8:ui16>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i16) : i16
-! MLIR: %[[c:.*]] = llvm.mlir.constant(8 : i16) : i16
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i16
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<8xi16>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<8xi16>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<8xi16>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0, 0, 0, 0, 0] : vector<8xi16>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<8xi16>>
 
 ! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <8 x i16> %[[x]], i16 0
@@ -971,26 +369,6 @@ end subroutine vec_splat_testu16i16
 subroutine vec_splat_testu16i32(x)
   vector(unsigned(2)) :: x, y
   y = vec_splat(x, 0_4)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:ui16>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i32
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! FIR: %[[c:.*]] = arith.constant 8 : i32
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i32
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i32] : vector<8xi16>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<8xi16>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<8:ui16>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[c:.*]] = llvm.mlir.constant(8 : i32) : i32
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i32
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<8xi16>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<8xi16>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<8xi16>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0, 0, 0, 0, 0] : vector<8xi16>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<8xi16>>
 
 ! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <8 x i16> %[[x]], i32 0
@@ -1003,26 +381,6 @@ end subroutine vec_splat_testu16i32
 subroutine vec_splat_testu16i64(x)
   vector(unsigned(2)) :: x, y
   y = vec_splat(x, 0_8)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:ui16>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i64
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! FIR: %[[c:.*]] = arith.constant 8 : i64
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i64
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i64] : vector<8xi16>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<8xi16>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<8:ui16>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i64) : i64
-! MLIR: %[[c:.*]] = llvm.mlir.constant(8 : i64) : i64
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i64
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<8xi16>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<8xi16>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<8xi16>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0, 0, 0, 0, 0] : vector<8xi16>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<8xi16>>
 
 ! LLVMIR: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <8 x i16> %[[x]], i64 0
@@ -1035,26 +393,6 @@ end subroutine vec_splat_testu16i64
 subroutine vec_splat_testu32i8(x)
   vector(unsigned(4)) :: x, y
   y = vec_splat(x, 0_1)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:ui32>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i8
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! FIR: %[[c:.*]] = arith.constant 4 : i8
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i8
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i8] : vector<4xi32>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<4xi32>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:ui32>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i8) : i8
-! MLIR: %[[c:.*]] = llvm.mlir.constant(4 : i8) : i8
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i8
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<4xi32>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xi32>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<4xi32>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0] : vector<4xi32>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
 
 ! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <4 x i32> %[[x]], i8 0
@@ -1067,26 +405,6 @@ end subroutine vec_splat_testu32i8
 subroutine vec_splat_testu32i16(x)
   vector(unsigned(4)) :: x, y
   y = vec_splat(x, 0_2)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:ui32>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i16
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! FIR: %[[c:.*]] = arith.constant 4 : i16
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i16
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i16] : vector<4xi32>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<4xi32>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:ui32>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i16) : i16
-! MLIR: %[[c:.*]] = llvm.mlir.constant(4 : i16) : i16
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i16
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<4xi32>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xi32>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<4xi32>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0] : vector<4xi32>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
 
 ! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <4 x i32> %[[x]], i16 0
@@ -1099,26 +417,6 @@ end subroutine vec_splat_testu32i16
 subroutine vec_splat_testu32i32(x)
   vector(unsigned(4)) :: x, y
   y = vec_splat(x, 0_4)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:ui32>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i32
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! FIR: %[[c:.*]] = arith.constant 4 : i32
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i32
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i32] : vector<4xi32>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<4xi32>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:ui32>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[c:.*]] = llvm.mlir.constant(4 : i32) : i32
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i32
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<4xi32>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xi32>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<4xi32>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0] : vector<4xi32>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
 
 ! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <4 x i32> %[[x]], i32 0
@@ -1131,26 +429,6 @@ end subroutine vec_splat_testu32i32
 subroutine vec_splat_testu32i64(x)
   vector(unsigned(4)) :: x, y
   y = vec_splat(x, 0_8)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:ui32>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i64
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! FIR: %[[c:.*]] = arith.constant 4 : i64
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i64
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i64] : vector<4xi32>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<4xi32>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:ui32>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i64) : i64
-! MLIR: %[[c:.*]] = llvm.mlir.constant(4 : i64) : i64
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i64
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<4xi32>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xi32>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<4xi32>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0] : vector<4xi32>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
 
 ! LLVMIR: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <4 x i32> %[[x]], i64 0
@@ -1163,26 +441,6 @@ end subroutine vec_splat_testu32i64
 subroutine vec_splat_testu64i8(x)
   vector(unsigned(8)) :: x, y
   y = vec_splat(x, 0_1)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:ui64>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i8
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! FIR: %[[c:.*]] = arith.constant 2 : i8
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i8
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i8] : vector<2xi64>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<2xi64>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:ui64>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i8) : i8
-! MLIR: %[[c:.*]] = llvm.mlir.constant(2 : i8) : i8
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i8
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i8] : vector<2xi64>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<2xi64>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<2xi64>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0] : vector<2xi64>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<2xi64>>
 
 ! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <2 x i64> %[[x]], i8 0
@@ -1195,26 +453,6 @@ end subroutine vec_splat_testu64i8
 subroutine vec_splat_testu64i16(x)
   vector(unsigned(8)) :: x, y
   y = vec_splat(x, 0_2)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:ui64>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i16
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! FIR: %[[c:.*]] = arith.constant 2 : i16
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i16
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i16] : vector<2xi64>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<2xi64>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:ui64>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i16) : i16
-! MLIR: %[[c:.*]] = llvm.mlir.constant(2 : i16) : i16
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i16
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i16] : vector<2xi64>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<2xi64>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<2xi64>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0] : vector<2xi64>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<2xi64>>
 
 ! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <2 x i64> %[[x]], i16 0
@@ -1227,26 +465,6 @@ end subroutine vec_splat_testu64i16
 subroutine vec_splat_testu64i32(x)
   vector(unsigned(8)) :: x, y
   y = vec_splat(x, 0_4)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:ui64>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i32
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! FIR: %[[c:.*]] = arith.constant 2 : i32
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i32
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i32] : vector<2xi64>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<2xi64>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:ui64>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[c:.*]] = llvm.mlir.constant(2 : i32) : i32
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i32
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i32] : vector<2xi64>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<2xi64>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<2xi64>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0] : vector<2xi64>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<2xi64>>
 
 ! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <2 x i64> %[[x]], i32 0
@@ -1259,26 +477,6 @@ end subroutine vec_splat_testu64i32
 subroutine vec_splat_testu64i64(x)
   vector(unsigned(8)) :: x, y
   y = vec_splat(x, 0_8)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:ui64>>
-! FIR: %[[idx:.*]] = arith.constant 0 : i64
-! FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! FIR: %[[c:.*]] = arith.constant 2 : i64
-! FIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i64
-! FIR: %[[ele:.*]] = vector.extractelement %[[vx]][%[[u]] : i64] : vector<2xi64>
-! FIR: %[[vy:.*]] = vector.splat %[[ele]] : vector<2xi64>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:ui64>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! MLIR: %[[idx:.*]] = llvm.mlir.constant(0 : i64) : i64
-! MLIR: %[[c:.*]] = llvm.mlir.constant(2 : i64) : i64
-! MLIR: %[[u:.*]] = llvm.urem %[[idx]], %[[c]]  : i64
-! MLIR: %[[ele:.*]] = llvm.extractelement %[[x]][%[[u]] : i64] : vector<2xi64>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<2xi64>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[ele]], %[[undef]][%[[zero]] : i32] : vector<2xi64>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0] : vector<2xi64>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<2xi64>>
 
 ! LLVMIR: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
 ! LLVMIR: %[[ele:.*]] = extractelement <2 x i64> %[[x]], i64 0
@@ -1296,17 +494,6 @@ subroutine vec_splats_testi8(x)
   integer(1) :: x
   vector(integer(1)) :: y
   y = vec_splats(x)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<i8>
-! FIR: %[[vy:.*]] = vector.splat %[[x]] : vector<16xi8>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<16xi8>) -> !fir.vector<16:i8>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:i8>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<i8>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<16xi8>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[x]], %[[undef]][%[[zero]] : i32] : vector<16xi8>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<16xi8>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<16xi8>>
 
 ! LLVMIR: %[[x:.*]] = load i8, ptr %{{[0-9]}}, align 1
 ! LLVMIR: %[[ins:.*]] = insertelement <16 x i8> undef, i8 %[[x]], i32 0
@@ -1319,17 +506,6 @@ subroutine vec_splats_testi16(x)
   integer(2) :: x
   vector(integer(2)) :: y
   y = vec_splats(x)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<i16>
-! FIR: %[[vy:.*]] = vector.splat %[[x]] : vector<8xi16>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<8:i16>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<i16>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<8xi16>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[x]], %[[undef]][%[[zero]] : i32] : vector<8xi16>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0, 0, 0, 0, 0] : vector<8xi16>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<8xi16>>
 
 ! LLVMIR: %[[x:.*]] = load i16, ptr %{{[0-9]}}, align 2
 ! LLVMIR: %[[ins:.*]] = insertelement <8 x i16> undef, i16 %[[x]], i32 0
@@ -1342,17 +518,6 @@ subroutine vec_splats_testi32(x)
   integer(4) :: x
   vector(integer(4)) :: y
   y = vec_splats(x)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<i32>
-! FIR: %[[vy:.*]] = vector.splat %[[x]] : vector<4xi32>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<i32>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xi32>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[x]], %[[undef]][%[[zero]] : i32] : vector<4xi32>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0] : vector<4xi32>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
 
 ! LLVMIR: %[[x:.*]] = load i32, ptr %{{[0-9]}}, align 4
 ! LLVMIR: %[[ins:.*]] = insertelement <4 x i32> undef, i32 %[[x]], i32 0
@@ -1365,17 +530,6 @@ subroutine vec_splats_testi64(x)
   integer(8) :: x
   vector(integer(8)) :: y
   y = vec_splats(x)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<i64>
-! FIR: %[[vy:.*]] = vector.splat %[[x]] : vector<2xi64>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<i64>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<2xi64>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[x]], %[[undef]][%[[zero]] : i32] : vector<2xi64>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0] : vector<2xi64>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<2xi64>>
 
 ! LLVMIR: %[[x:.*]] = load i64, ptr %{{[0-9]}}, align 8
 ! LLVMIR: %[[ins:.*]] = insertelement <2 x i64> undef, i64 %[[x]], i32 0
@@ -1388,17 +542,6 @@ subroutine vec_splats_testf32(x)
   real(4) :: x
   vector(real(4)) :: y
   y = vec_splats(x)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<f32>
-! FIR: %[[vy:.*]] = vector.splat %[[x]] : vector<4xf32>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<f32>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xf32>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[x]], %[[undef]][%[[zero]] : i32] : vector<4xf32>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0, 0, 0] : vector<4xf32>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<4xf32>>
 
 ! LLVMIR: %[[x:.*]] = load float, ptr %{{[0-9]}}, align 4
 ! LLVMIR: %[[ins:.*]] = insertelement <4 x float> undef, float %[[x]], i32 0
@@ -1411,17 +554,6 @@ subroutine vec_splats_testf64(x)
   real(8) :: x
   vector(real(8)) :: y
   y = vec_splats(x)
-! FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<f64>
-! FIR: %[[vy:.*]] = vector.splat %[[x]] : vector<2xf64>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-
-! MLIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<f64>
-! MLIR: %[[undef:.*]] = llvm.mlir.undef : vector<2xf64>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[x]], %[[undef]][%[[zero]] : i32] : vector<2xf64>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[undef]] [0, 0] : vector<2xf64>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<2xf64>>
 
 ! LLVMIR: %[[x:.*]] = load double, ptr %{{[0-9]}}, align 8
 ! LLVMIR: %[[ins:.*]] = insertelement <2 x double> undef, double %[[x]], i32 0
@@ -1433,19 +565,6 @@ end subroutine vec_splats_testf64
 subroutine vec_splat_s32testi8()
   vector(integer(4)) :: y
   y = vec_splat_s32(7_1)
-! FIR: %[[val:.*]] = arith.constant 7 : i8
-! FIR: %[[cval:.*]] = fir.convert %[[val]] : (i8) -> i32
-! FIR: %[[vy:.*]] = vector.splat %[[cval]] : vector<4xi32>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-
-! MLIR: %[[val:.*]] = llvm.mlir.constant(7 : i8) : i8
-! MLIR: %[[cval:.*]] = llvm.sext %[[val]] : i8 to i32
-! MLIR: %[[und:.*]] = llvm.mlir.undef : vector<4xi32>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[cval]], %[[und]][%[[zero]] : i32] : vector<4xi32>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[und]] [0, 0, 0, 0] : vector<4xi32>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
 
 ! LLVMIR: store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, ptr %{{[0-9]}}, align 16
 end subroutine vec_splat_s32testi8
@@ -1454,19 +573,6 @@ end subroutine vec_splat_s32testi8
 subroutine vec_splat_s32testi16()
   vector(integer(4)) :: y
   y = vec_splat_s32(7_2)
-! FIR: %[[val:.*]] = arith.constant 7 : i16
-! FIR: %[[cval:.*]] = fir.convert %[[val]] : (i16) -> i32
-! FIR: %[[vy:.*]] = vector.splat %[[cval]] : vector<4xi32>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-
-! MLIR: %[[val:.*]] = llvm.mlir.constant(7 : i16) : i16
-! MLIR: %[[cval:.*]] = llvm.sext %[[val]] : i16 to i32
-! MLIR: %[[und:.*]] = llvm.mlir.undef : vector<4xi32>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[cval]], %[[und]][%[[zero]] : i32] : vector<4xi32>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[und]] [0, 0, 0, 0] : vector<4xi32>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
 
 ! LLVMIR: store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, ptr %{{[0-9]}}, align 16
 end subroutine vec_splat_s32testi16
@@ -1475,14 +581,6 @@ end subroutine vec_splat_s32testi16
 subroutine vec_splat_s32testi32()
   vector(integer(4)) :: y
   y = vec_splat_s32(7_4)
-! FIR: %[[val:.*]] = arith.constant 7 : i32
-! FIR: %[[vy:.*]] = vector.splat %[[val]] : vector<4xi32>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-
-! MLIR: %[[val:.*]] = llvm.mlir.constant(7 : i32) : i32
-! MLIR: %[[y:.*]] = llvm.mlir.constant(dense<7> : vector<4xi32>) : vector<4xi32>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
 
 ! LLVMIR: store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, ptr %{{[0-9]}}, align 16
 end subroutine vec_splat_s32testi32
@@ -1491,19 +589,6 @@ end subroutine vec_splat_s32testi32
 subroutine vec_splat_s32testi64()
   vector(integer(4)) :: y
   y = vec_splat_s32(7_8)
-! FIR: %[[val:.*]] = arith.constant 7 : i64
-! FIR: %[[cval:.*]] = fir.convert %[[val]] : (i64) -> i32
-! FIR: %[[vy:.*]] = vector.splat %[[cval]] : vector<4xi32>
-! FIR: %[[y:.*]] = fir.convert %[[vy]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[y]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-
-! MLIR: %[[val:.*]] = llvm.mlir.constant(7 : i64) : i64
-! MLIR: %[[cval:.*]] = llvm.trunc %[[val]] : i64 to i32
-! MLIR: %[[und:.*]] = llvm.mlir.undef : vector<4xi32>
-! MLIR: %[[zero:.*]] = llvm.mlir.constant(0 : i32) : i32
-! MLIR: %[[ins:.*]] = llvm.insertelement %[[cval]], %[[und]][%[[zero]] : i32] : vector<4xi32>
-! MLIR: %[[y:.*]] = llvm.shufflevector %[[ins]], %[[und]] [0, 0, 0, 0] : vector<4xi32>
-! MLIR: llvm.store %[[y]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
 
 ! LLVMIR: store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, ptr %{{[0-9]}}, align 16
 end subroutine vec_splat_s32testi64

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90
index 387033eadabac96..494ed21f4fe928b 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90
@@ -1,5 +1,4 @@
-! RUN: %flang_fc1 -emit-fir %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="FIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="LLVMIR" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
 !----------------------
@@ -12,16 +11,6 @@ subroutine vec_st_test(arg1, arg2, arg3)
   vector(integer(2)) :: arg3
   call vec_st(arg1, arg2, arg3)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
-! FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
-! FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! FIR: %[[bc:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
-! FIR: %[[ordr:.*]] = fir.undefined vector<4xi32>
-! FIR: %[[shf:.*]] = vector.shuffle %[[bc]], %[[ordr]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
-! FIR: fir.call @llvm.ppc.altivec.stvx(%[[shf]], %[[addr]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
-
 ! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16
 ! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
@@ -40,16 +29,6 @@ subroutine vec_ste_test(arg1, arg2, arg3)
   real(4) :: arg3
   call vec_ste(arg1, arg2, arg3)
   
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
-! FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
-! FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! FIR: %[[bc:.*]] = vector.bitcast %[[varg1]] : vector<4xf32> to vector<4xi32>
-! FIR: %[[ordr:.*]] = fir.undefined vector<4xi32>
-! FIR: %[[shf:.*]] = vector.shuffle %[[bc]], %[[ordr]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
-! FIR: fir.call @llvm.ppc.altivec.stvewx(%[[shf]], %[[addr]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
-
 ! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %0, align 16
 ! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
 ! LLVMIR: %[[addr]] = getelementptr i8, ptr %2, i32 %[[arg2]]
@@ -68,22 +47,11 @@ subroutine vec_xst_test(arg1, arg2, arg3)
   vector(integer(4)) :: arg3
   call vec_xst(arg1, arg2, arg3)
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
-! FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
-! FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:i32>>
-! FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! FIR: %[[ordr:.*]] = fir.undefined vector<4xi32>
-! FIR: %[[shf:.*]] = vector.shuffle %[[varg1]], %[[ordr]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
-! FIR: %[[src:.*]] = fir.convert %[[shf]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! FIR: fir.store %[[src]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:i32>>
-
 ! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
 ! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
 ! LLVMIR: %[[trg:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
 ! LLVMIR: %[[src:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-! LLVMIR:  store <4 x i32> %[[src]], ptr %[[trg]], align 1
+! LLVMIR:  store <4 x i32> %[[src]], ptr %[[trg]], align 16
 end subroutine vec_xst_test
 
 !----------------------
@@ -97,33 +65,19 @@ subroutine vec_xstd2_test(arg1, arg2, arg3, i)
   integer(4) :: i
   call vec_xstd2(arg1, arg2, arg3(i))
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
-! FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i16>
-! FIR: %[[arg4:.*]] = fir.load %arg3 : !fir.ref<i32>
-! FIR: %[[arg4_64:.*]] = fir.convert %[[arg4]] : (i32) -> i64
-! FIR: %[[one:.*]] = arith.constant 1 : i64
-! FIR: %[[idx:.*]] = arith.subi %[[arg4_64]], %[[one]] : i64
-! FIR: %[[elemaddr:.*]] = fir.coordinate_of %arg2, %[[idx]] : (!fir.ref<!fir.array<?x!fir.vector<4:f32>>>, i64) -> !fir.ref<!fir.vector<4:f32>>
-! FIR: %[[elemptr:.*]] = fir.convert %[[elemaddr]] : (!fir.ref<!fir.vector<4:f32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[elemptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! FIR: %[[v2elem:.*]] = vector.bitcast %[[varg1]] : vector<4xf32> to vector<2xi64>
-! FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<2:i64>>
-! FIR: %[[undef:.*]] = fir.undefined vector<2xi64>
-! FIR: %[[shf:.*]] = vector.shuffle %[[v2elem]], %[[undef]] [1, 0] : vector<2xi64>, vector<2xi64>
-! FIR: %[[src:.*]] = fir.convert %[[shf]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! FIR: fir.store %[[src]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<2:i64>>
-
+! LLVMIR: %[[i:.*]] = load i32, ptr %3, align 4
+! LLVMIR: %[[iext:.*]] = sext i32 %[[i]] to i64
+! LLVMIR: %[[isub:.*]] = sub i64 %[[iext]], 1
+! LLVMIR: %[[imul1:.*]] = mul i64 %[[isub]], 1
+! LLVMIR: %[[imul2:.*]] = mul i64 %[[imul1]], 1
+! LLVMIR: %[[iadd:.*]] = add i64 %[[imul2]], 0
+! LLVMIR: %[[gep1:.*]] = getelementptr <4 x float>, ptr %2, i64 %[[iadd]]
 ! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %0, align 16
 ! LLVMIR: %[[arg2:.*]] = load i16, ptr %1, align 2
-! LLVMIR: %[[arg4:.*]] = load i32, ptr %3, align 4
-! LLVMIR: %[[arg4_64:.*]] = sext i32 %[[arg4]] to i64
-! LLVMIR: %[[idx:.*]] = sub i64 %[[arg4_64]], 1
-! LLVMIR: %[[elemptr:.*]] = getelementptr <4 x float>, ptr %2, i64 %[[idx]]
-! LLVMIR: %[[trg:.*]] = getelementptr i8, ptr %[[elemptr]], i16 %[[arg2]]
-! LLVMIR: %[[v2elem:.*]] = bitcast <4 x float> %[[arg1]] to <2 x i64>
-! LLVMIR: %[[src:.*]] = shufflevector <2 x i64> %[[v2elem]], <2 x i64> undef, <2 x i32> <i32 1, i32 0>
-! LLVMIR: store <2 x i64> %[[src]], ptr %[[trg]], align 1
+! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i16 %[[arg2]]
+! LLVMIR: %[[src:.*]] = bitcast <4 x float> %[[arg1]] to <2 x i64>
+! LLVMIR: %[[shf:.*]] = shufflevector <2 x i64> %[[src]], <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+! LLVMIR: store <2 x i64> %[[shf]], ptr %[[gep2]], align 16
 end subroutine vec_xstd2_test
 
 !----------------------
@@ -137,29 +91,16 @@ subroutine vec_xstw4_test(arg1, arg2, arg3, i)
   integer(4) :: i
   call vec_xstw4(arg1, arg2, arg3(i))
 
-! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
-! FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i16>
-! FIR: %[[arg4:.*]] = fir.load %arg3 : !fir.ref<i32>
-! FIR: %[[arg4_64:.*]] = fir.convert %[[arg4]] : (i32) -> i64
-! FIR: %[[one:.*]] = arith.constant 1 : i64
-! FIR: %[[idx:.*]] = arith.subi %[[arg4_64]], %[[one]] : i64
-! FIR: %[[elemaddr:.*]] = fir.coordinate_of %arg2, %[[idx]] : (!fir.ref<!fir.array<?x!fir.vector<4:f32>>>, i64) -> !fir.ref<!fir.vector<4:f32>>
-! FIR: %[[elemptr:.*]] = fir.convert %[[elemaddr]] : (!fir.ref<!fir.vector<4:f32>>) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[addr:.*]] = fir.coordinate_of %[[elemptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:f32>>
-! FIR: %[[undef:.*]] = fir.undefined vector<4xf32>
-! FIR: %[[shf:.*]] = vector.shuffle %[[varg1]], %[[undef]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
-! FIR: %[[src:.*]] = fir.convert %[[shf]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[src]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:f32>>
-
+! LLVMIR: %[[i:.*]] = load i32, ptr %3, align 4
+! LLVMIR: %[[iext:.*]] = sext i32 %[[i]] to i64
+! LLVMIR: %[[isub:.*]] = sub i64 %[[iext]], 1
+! LLVMIR: %[[imul1:.*]] = mul i64 %[[isub]], 1
+! LLVMIR: %[[imul2:.*]] = mul i64 %[[imul1]], 1
+! LLVMIR: %[[iadd:.*]] = add i64 %[[imul2]], 0
+! LLVMIR: %[[gep1:.*]] = getelementptr <4 x float>, ptr %2, i64 %[[iadd]]
 ! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %0, align 16
 ! LLVMIR: %[[arg2:.*]] = load i16, ptr %1, align 2
-! LLVMIR: %[[arg4:.*]] = load i32, ptr %3, align 4
-! LLVMIR: %[[arg4_64:.*]] = sext i32 %[[arg4]] to i64
-! LLVMIR: %[[idx:.*]] = sub i64 %[[arg4_64]], 1
-! LLVMIR: %[[elemptr:.*]] = getelementptr <4 x float>, ptr %2, i64 %[[idx]]
-! LLVMIR: %[[trg:.*]] = getelementptr i8, ptr %[[elemptr]], i16 %[[arg2]]
+! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i16 %[[arg2]]
 ! LLVMIR: %[[src:.*]] = shufflevector <4 x float> %[[arg1]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-! LLVMIR: store <4 x float> %[[src]], ptr %[[trg]], align 1
+! LLVMIR: store <4 x float> %[[src]], ptr %[[gep2]], align 16
 end subroutine vec_xstw4_test

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-store.f90 b/flang/test/Lower/PowerPC/ppc-vec-store.f90
index 8aa452d41105255..8e20228d6825989 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-store.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-store.f90
@@ -1,6 +1,4 @@
-! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s
-! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR" %s
 ! REQUIRES: target=powerpc{{.*}}
 
 !----------------------
@@ -13,26 +11,11 @@ subroutine vec_st_vi1i2vi1(arg1, arg2, arg3)
   integer(2) :: arg2
   call vec_st(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
-! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<16:i8>>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[bcArg1:.*]] = vector.bitcast %[[cnvArg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: fir.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<vector<16xi8>> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[bcArg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
-! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5
-! CHECK: %[[bcArg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[bcArg1]], ptr %[[arg3]])
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! LLVMIR: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5
+! LLVMIR: %[[bcArg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[bcArg1]], ptr %[[arg3]])
 end subroutine vec_st_vi1i2vi1
 
 ! CHECK-LABEL: vec_st_vi2i2vi2
@@ -41,26 +24,11 @@ subroutine vec_st_vi2i2vi2(arg1, arg2, arg3)
   integer(2) :: arg2
   call vec_st(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
-! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[bcArg1:.*]] = vector.bitcast %[[cnvArg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: fir.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<vector<8xi16>> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[bcArg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
-! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5
-! CHECK: %[[bcArg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[bcArg1]], ptr %[[arg3]])
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! LLVMIR: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5
+! LLVMIR: %[[bcArg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[bcArg1]], ptr %[[arg3]])
 end subroutine vec_st_vi2i2vi2
 
 ! CHECK-LABEL: vec_st_vi4i2vi4
@@ -69,23 +37,10 @@ subroutine vec_st_vi4i2vi4(arg1, arg2, arg3)
   integer(2) :: arg2
   call vec_st(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
-! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: fir.call @llvm.ppc.altivec.stvx(%[[varg1]], %[[addr]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<vector<4xi32>> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
-! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5
-! CHECK: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[arg1]], ptr %[[arg3]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! LLVMIR: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5
+! LLVMIR: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[arg1]], ptr %[[arg3]])
 end subroutine vec_st_vi4i2vi4
 
 ! CHECK-LABEL: vec_st_vu1i4vu1
@@ -94,26 +49,11 @@ subroutine vec_st_vu1i4vu1(arg1, arg2, arg3)
   integer(4) :: arg2
   call vec_st(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<16:ui8>>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[bcArg1:.*]] = vector.bitcast %[[cnvArg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: fir.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<vector<16xi8>> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[bcArg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
-! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4
-! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5
-! CHECK: %[[bcArg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
-! CHECK: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[bcArg1]], ptr %[[arg3]])
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4
+! LLVMIR: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5
+! LLVMIR: %[[bcArg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! LLVMIR: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[bcArg1]], ptr %[[arg3]])
 end subroutine vec_st_vu1i4vu1
 
 ! CHECK-LABEL: vec_st_vu2i4vu2
@@ -122,26 +62,11 @@ subroutine vec_st_vu2i4vu2(arg1, arg2, arg3)
   integer(4) :: arg2
   call vec_st(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<8:ui16>>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[bcArg1:.*]] = vector.bitcast %[[cnvArg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: fir.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<vector<8xi16>> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[bcArg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4
-! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5
-! CHECK: %[[bcArg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[bcArg1]], ptr %[[arg3]])
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4
+! LLVMIR: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5
+! LLVMIR: %[[bcArg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[bcArg1]], ptr %[[arg3]])
 end subroutine vec_st_vu2i4vu2
 
 ! CHECK-LABEL: vec_st_vu4i4vu4
@@ -150,23 +75,10 @@ subroutine vec_st_vu4i4vu4(arg1, arg2, arg3)
   integer(4) :: arg2
   call vec_st(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:ui32>>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: fir.call @llvm.ppc.altivec.stvx(%[[varg1]], %[[addr]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<vector<4xi32>> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4
-! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5
-! CHECK: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[arg1]], ptr %[[arg3]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4
+! LLVMIR: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5
+! LLVMIR: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[arg1]], ptr %[[arg3]])
 end subroutine vec_st_vu4i4vu4
 
 ! CHECK-LABEL: vec_st_vi4i4via4
@@ -175,37 +87,17 @@ subroutine vec_st_vi4i4via4(arg1, arg2, arg3, i)
   integer(4) :: arg2, i
   call vec_st(arg1, arg2, arg3(i))
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! CHECK-FIR: %[[idx:.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
-! CHECK-FIR: %[[cnst:.*]] = arith.constant 1 : i64
-! CHECK-FIR: %[[cnstm1:.*]] = arith.subi %[[idx64]], %[[cnst]] : i64
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %arg2, %[[cnstm1]] : (!fir.ref<!fir.array<5x!fir.vector<4:i32>>>, i64) -> !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[ref:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[pos:.*]] = fir.coordinate_of %[[ref]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: fir.call @llvm.ppc.altivec.stvx(%[[varg1]], %[[pos]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
-! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
-! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %arg2[0, %[[idx64m1]]] : (!llvm.ptr<array<5 x vector<4xi32>>>, i64) -> !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[bc:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<vector<4xi32>> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[pos:.*]] = llvm.getelementptr %[[bc]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvx(%[[arg1]], %[[pos]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
-
-! CHECK:  %5 = load <4 x i32>, ptr %0, align 16
-! CHECK:  %6 = load i32, ptr %1, align 4
-! CHECK:  %7 = load i32, ptr %3, align 4
-! CHECK:  %8 = sext i32 %7 to i64
-! CHECK:  %9 = sub i64 %8, 1
-! CHECK:  %10 = getelementptr [5 x <4 x i32>], ptr %2, i32 0, i64 %9
-! CHECK:  %11 = getelementptr i8, ptr %10, i32 %6
-! CHECK:  call void @llvm.ppc.altivec.stvx(<4 x i32> %5, ptr %11)
+! LLVMIR: %[[i:.*]] = load i32, ptr %3, align 4
+! LLVMIR: %[[iext:.*]] = sext i32 %[[i]] to i64
+! LLVMIR: %[[iextsub:.*]] = sub i64 %[[iext]], 1
+! LLVMIR: %[[iextmul:.*]] = mul i64 %[[iextsub]], 1
+! LLVMIR: %[[iextmul2:.*]] = mul i64 %[[iextmul]], 1
+! LLVMIR: %[[iextadd:.*]] = add i64 %[[iextmul2]], 0
+! LLVMIR: %[[gep1:.*]] = getelementptr <4 x i32>, ptr %2, i64 %[[iextadd]]
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
+! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i32 %[[arg2]]
+! LLVMIR: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[arg1]], ptr %[[gep2]])
 end subroutine vec_st_vi4i4via4
 
 !----------------------
@@ -219,22 +111,10 @@ subroutine vec_ste_vi1i2i1(arg1, arg2, arg3)
   integer(1) :: arg3
   call vec_ste(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
-! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: fir.call @llvm.ppc.altivec.stvebx(%[[cnvArg1]], %[[addr]]) fastmath<contract> : (vector<16xi8>, !fir.ref<!fir.array<?xi8>>) -> ()
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3:.*]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvebx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<16xi8>, !llvm.ptr<i8>) -> ()
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
-! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5
-! CHECK: call void @llvm.ppc.altivec.stvebx(<16 x i8> %[[arg1]], ptr %[[arg3]])
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! LLVMIR: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5
+! LLVMIR: call void @llvm.ppc.altivec.stvebx(<16 x i8> %[[arg1]], ptr %[[arg3]])
 end subroutine vec_ste_vi1i2i1
 
 ! CHECK-LABEL: vec_ste_vi2i2i2
@@ -244,23 +124,10 @@ subroutine vec_ste_vi2i2i2(arg1, arg2, arg3)
   integer(2) :: arg3
   call vec_ste(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
-! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<i16>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: fir.call @llvm.ppc.altivec.stvehx(%[[cnvArg1]], %[[addr]]) fastmath<contract> : (vector<8xi16>, !fir.ref<!fir.array<?xi8>>) -> ()
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %arg2 : !llvm.ptr<i16> to !llvm.ptr<i8> 
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvehx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<8xi16>, !llvm.ptr<i8>) -> ()
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
-! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5
-! CHECK: call void @llvm.ppc.altivec.stvehx(<8 x i16> %[[arg1]], ptr %[[arg3]])
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! LLVMIR: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5
+! LLVMIR: call void @llvm.ppc.altivec.stvehx(<8 x i16> %[[arg1]], ptr %[[arg3]])
 end subroutine vec_ste_vi2i2i2
 
 ! CHECK-LABEL: vec_ste_vi4i2i4
@@ -270,23 +137,10 @@ subroutine vec_ste_vi4i2i4(arg1, arg2, arg3)
   integer(4) :: arg3
   call vec_ste(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
-! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<i32>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: fir.call @llvm.ppc.altivec.stvewx(%[[varg1]], %[[addr]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<i32> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvewx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
-! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5
-! CHECK: call void @llvm.ppc.altivec.stvewx(<4 x i32> %[[arg1]], ptr %[[arg3]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! LLVMIR: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5
+! LLVMIR: call void @llvm.ppc.altivec.stvewx(<4 x i32> %[[arg1]], ptr %[[arg3]])
 end subroutine vec_ste_vi4i2i4
 
 ! CHECK-LABEL: vec_ste_vu1i4u1
@@ -296,22 +150,10 @@ subroutine vec_ste_vu1i4u1(arg1, arg2, arg3)
   integer(1) :: arg3
   call vec_ste(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: fir.call @llvm.ppc.altivec.stvebx(%[[cnvArg1]], %[[addr]]) fastmath<contract> : (vector<16xi8>, !fir.ref<!fir.array<?xi8>>) -> ()
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3:.*]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvebx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<16xi8>, !llvm.ptr<i8>) -> ()
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4
-! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5
-! CHECK: call void @llvm.ppc.altivec.stvebx(<16 x i8> %[[arg1]], ptr %[[arg3]])
+! LLVMIR: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4
+! LLVMIR: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5
+! LLVMIR: call void @llvm.ppc.altivec.stvebx(<16 x i8> %[[arg1]], ptr %[[arg3]])
 end subroutine vec_ste_vu1i4u1
 
 ! CHECK-LABEL: vec_ste_vu2i4u2
@@ -321,23 +163,10 @@ subroutine vec_ste_vu2i4u2(arg1, arg2, arg3)
   integer(2) :: arg3
   call vec_ste(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<i16>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: fir.call @llvm.ppc.altivec.stvehx(%[[cnvArg1]], %[[addr]]) fastmath<contract> : (vector<8xi16>, !fir.ref<!fir.array<?xi8>>) -> ()
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<i16> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvehx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<8xi16>, !llvm.ptr<i8>) -> ()
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4
-! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5
-! CHECK: call void @llvm.ppc.altivec.stvehx(<8 x i16> %[[arg1]], ptr %[[arg3]])
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4
+! LLVMIR: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5
+! LLVMIR: call void @llvm.ppc.altivec.stvehx(<8 x i16> %[[arg1]], ptr %[[arg3]])
 end subroutine vec_ste_vu2i4u2
 
 ! CHECK-LABEL: vec_ste_vu4i4u4
@@ -347,23 +176,10 @@ subroutine vec_ste_vu4i4u4(arg1, arg2, arg3)
   integer(4) :: arg3
   call vec_ste(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<i32>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32> 
-! CHECK-FIR: fir.call @llvm.ppc.altivec.stvewx(%[[varg1]], %[[addr]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<i32> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvewx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4
-! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5
-! CHECK: call void @llvm.ppc.altivec.stvewx(<4 x i32> %[[arg1]], ptr %[[arg3]])
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4
+! LLVMIR: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5
+! LLVMIR: call void @llvm.ppc.altivec.stvewx(<4 x i32> %[[arg1]], ptr %[[arg3]])
 end subroutine vec_ste_vu4i4u4
 
 ! CHECK-LABEL: vec_ste_vr4i4r4
@@ -373,26 +189,11 @@ subroutine vec_ste_vr4i4r4(arg1, arg2, arg3)
   real(4) :: arg3
   call vec_ste(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! CHECK-FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[pos:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[bc:.*]] = vector.bitcast %[[cnvArg1]] : vector<4xf32> to vector<4xi32>
-! CHECK-FIR: fir.call @llvm.ppc.altivec.stvewx(%[[bc]], %[[pos]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %arg2 : !llvm.ptr<f32> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[pos:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[bc:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<4xi32>
-! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvewx(%[[bc]], %[[pos]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %0, align 16
-! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
-! CHECK: %[[pos:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
-! CHECK: %[[bc:.*]] = bitcast <4 x float> %[[arg1]] to <4 x i32>
-! CHECK: call void @llvm.ppc.altivec.stvewx(<4 x i32> %[[bc]], ptr %[[pos]])
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
+! LLVMIR: %[[pos:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
+! LLVMIR: %[[bc:.*]] = bitcast <4 x float> %[[arg1]] to <4 x i32>
+! LLVMIR: call void @llvm.ppc.altivec.stvewx(<4 x i32> %[[bc]], ptr %[[pos]])
 
 end subroutine vec_ste_vr4i4r4
 
@@ -403,37 +204,17 @@ subroutine vec_ste_vi4i4ia4(arg1, arg2, arg3, i)
   integer(4) :: arg3(5)
   call vec_ste(arg1, arg2, arg3(i))
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! CHECK-FIR: %[[idx:.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
-! CHECK-FIR: %[[cnst:.*]] = arith.constant 1 : i64
-! CHECK-FIR: %[[cnstm1:.*]] = arith.subi %[[idx64]], %[[cnst]] : i64
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %arg2, %[[cnstm1]] : (!fir.ref<!fir.array<5xi32>>, i64) -> !fir.ref<i32>
-! CHECK-FIR: %[[ref:.*]] = fir.convert %[[addr]] : (!fir.ref<i32>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[pos:.*]] = fir.coordinate_of %[[ref]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: fir.call @llvm.ppc.altivec.stvewx(%[[varg1]], %[[pos]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
-! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
-! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]]  : i64
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %arg2[0, %[[idx64m1]]] : (!llvm.ptr<array<5 x i32>>, i64) -> !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[bc:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i32> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[pos:.*]] = llvm.getelementptr %[[bc]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvewx(%[[arg1]], %[[pos]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
-
-! CHECK:  %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK:  %[[arg2:.*]] = load i32, ptr %1, align 4
-! CHECK:  %[[idx:.*]] = load i32, ptr %3, align 4
-! CHECK:  %[[idx64:.*]] = sext i32 %[[idx]] to i64
-! CHECK:  %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
-! CHECK:  %[[addr:.*]] = getelementptr [5 x i32], ptr %[[arg3:.*]], i32 0, i64 %[[idx64m1]]
-! CHECK:  %[[pos:.*]] = getelementptr i8, ptr %[[addr]], i32 %[[arg2]]
-! CHECK:  call void @llvm.ppc.altivec.stvewx(<4 x i32> %[[arg1]], ptr %[[pos]])
+! LLVMIR: %[[i:.*]] = load i32, ptr %3, align 4
+! LLVMIR: %[[iext:.*]] = sext i32 %[[i]] to i64
+! LLVMIR: %[[isub:.*]] = sub i64 %[[iext]], 1
+! LLVMIR: %[[imul1:.*]] = mul i64 %[[isub]], 1
+! LLVMIR: %[[imul2:.*]] = mul i64 %[[imul1]], 1
+! LLVMIR: %[[iadd:.*]] = add i64 %[[imul2]], 0
+! LLVMIR: %[[gep1:.*]] = getelementptr i32, ptr %2, i64 %[[iadd]]
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
+! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i32 %[[arg2]]
+! LLVMIR: call void @llvm.ppc.altivec.stvewx(<4 x i32> %[[arg1]], ptr %[[gep2]])
 end subroutine vec_ste_vi4i4ia4
 
 !----------------------
@@ -447,24 +228,10 @@ subroutine vec_stxv_test_vr4i2r4(arg1, arg2, arg3)
   real(4) :: arg3
   call vec_stxv(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
-! CHECK-FIR: %[[arg3ptr:.*]] = fir.convert %arg2 : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3ptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[arg3ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3ptr]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
-! CHECK: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]]
-! CHECK: store <4 x float> %[[arg1]], ptr %[[addr]], align 1
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]]
+! LLVMIR: store <4 x float> %[[arg1]], ptr %[[addr]], align 16
 end subroutine vec_stxv_test_vr4i2r4
 
 ! CHECK-LABEL: vec_stxv_test_vi4i8ia4
@@ -475,38 +242,17 @@ subroutine vec_stxv_test_vi4i8ia4(arg1, arg2, arg3, i)
   integer(4) :: i
   call vec_stxv(arg1, arg2, arg3(i))
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i64>
-! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref<i32>
-! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
-! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64
-! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64
-! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %arg2, %[[idx64m1]] : (!fir.ref<!fir.array<10xi32>>, i64) -> !fir.ref<i32>
-! CHECK-FIR: %[[elemref:.*]] = fir.convert %[[elem]] : (!fir.ref<i32>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemref]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i64>
-! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
-! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
-! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]]  : i64
-! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %arg2[0, %[[idx64m1]]] : (!llvm.ptr<array<10 x i32>>, i64) -> !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[elemref:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr<i32> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemref]][%[[arg2]]] : (!llvm.ptr<i8>, i64) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[arg2:.*]] = load i64, ptr %1, align 8
-! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4
-! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64
-! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
-! CHECK: %[[elem:.*]] = getelementptr [10 x i32], ptr %2, i32 0, i64 %[[idx64m1]]
-! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i64 %6
-! CHECK: store <4 x i32> %[[arg1]], ptr %[[trg]], align 1
+! LLVMIR: %[[i:.*]] = load i32, ptr %3, align 4
+! LLVMIR: %[[iext:.*]] = sext i32 %[[i]] to i64
+! LLVMIR: %[[isub:.*]] = sub i64 %[[iext]], 1
+! LLVMIR: %[[imul1:.*]] = mul i64 %[[isub]], 1
+! LLVMIR: %[[imul2:.*]] = mul i64 %[[imul1]], 1
+! LLVMIR: %[[iadd:.*]] = add i64 %[[imul2]], 0
+! LLVMIR: %[[gep1:.*]] = getelementptr i32, ptr %2, i64 %[[iadd]]
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i64, ptr %1, align 8
+! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i64 %[[arg2]]
+! LLVMIR: store <4 x i32> %[[arg1]], ptr %[[gep2]], align 16
 end subroutine vec_stxv_test_vi4i8ia4
 
 ! CHECK-LABEL: vec_stxv_test_vi2i4vi2
@@ -516,24 +262,10 @@ subroutine vec_stxv_test_vi2i4vi2(arg1, arg2, arg3)
   vector(integer(2)) :: arg3
   call vec_stxv(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
-! CHECK-FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %arg2 : !llvm.ptr<vector<8xi16>> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
-! CHECK: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
-! CHECK:  store <8 x i16> %[[arg1]], ptr %[[addr]], align 1
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
+! LLVMIR: store <8 x i16> %[[arg1]], ptr %[[addr]], align 16
 end subroutine vec_stxv_test_vi2i4vi2
 
 ! CHECK-LABEL: vec_stxv_test_vi4i4vai4
@@ -544,38 +276,17 @@ subroutine vec_stxv_test_vi4i4vai4(arg1, arg2, arg3, i)
   integer(4) :: i
   call vec_stxv(arg1, arg2, arg3(i))
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
-! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref<i32>
-! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
-! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64
-! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64
-! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %[[arg3:.*]], %[[idx64m1]] : (!fir.ref<!fir.array<20x!fir.vector<4:i32>>>, i64) -> !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[elemptr:.*]] = fir.convert %[[elem]] : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
-! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
-! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]]  : i64
-! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %[[arg3:.*]][0, %[[idx64m1]]] : (!llvm.ptr<array<20 x vector<4xi32>>>, i64) -> !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[elemptr:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr<vector<4xi32>> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemptr]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
-! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4
-! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64
-! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
-! CHECK: %[[elem:.*]] = getelementptr [20 x <4 x i32>], ptr %2, i32 0, i64 %[[idx64m1]]
-! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i32 %[[arg2]]
-! CHECK: store <4 x i32> %[[arg1]], ptr %[[trg]], align 1
+! LLVMIR: %[[i:.*]] = load i32, ptr %3, align 4
+! LLVMIR: %[[iext:.*]] = sext i32 %[[i]] to i64
+! LLVMIR: %[[isub:.*]] = sub i64 %[[iext]], 1
+! LLVMIR: %[[imul1:.*]] = mul i64 %[[isub]], 1
+! LLVMIR: %[[imul2:.*]] = mul i64 %[[imul1]], 1
+! LLVMIR: %[[iadd:.*]] = add i64 %[[imul2]], 0
+! LLVMIR: %[[gep1:.*]] = getelementptr <4 x i32>, ptr %2, i64 %[[iadd]]
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
+! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i32 %[[arg2]]
+! LLVMIR: store <4 x i32> %[[arg1]], ptr %[[gep2]], align 16
 end subroutine vec_stxv_test_vi4i4vai4
 
 !----------------------
@@ -589,24 +300,11 @@ subroutine vec_xst_test_vr4i2r4(arg1, arg2, arg3)
   real(4) :: arg3
   call vec_xst(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
-! CHECK-FIR: %[[arg3ptr:.*]] = fir.convert %arg2 : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3ptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[arg3ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3ptr]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
   
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
-! CHECK: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]]
-! CHECK: store <4 x float> %[[arg1]], ptr %[[addr]], align 1
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]]
+! LLVMIR: store <4 x float> %[[arg1]], ptr %[[addr]], align 16
 end subroutine vec_xst_test_vr4i2r4
 
 ! CHECK-LABEL: vec_xst_test_vi4i8ia4
@@ -617,38 +315,17 @@ subroutine vec_xst_test_vi4i8ia4(arg1, arg2, arg3, i)
   integer(4) :: i
   call vec_xst(arg1, arg2, arg3(i))
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i64>
-! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref<i32>
-! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
-! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64
-! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64
-! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %arg2, %[[idx64m1]] : (!fir.ref<!fir.array<10xi32>>, i64) -> !fir.ref<i32>
-! CHECK-FIR: %[[elemref:.*]] = fir.convert %[[elem]] : (!fir.ref<i32>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemref]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i64>
-! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
-! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
-! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]]  : i64
-! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %arg2[0, %[[idx64m1]]] : (!llvm.ptr<array<10 x i32>>, i64) -> !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[elemref:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr<i32> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemref]][%[[arg2]]] : (!llvm.ptr<i8>, i64) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[arg2:.*]] = load i64, ptr %1, align 8
-! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4
-! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64
-! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
-! CHECK: %[[elem:.*]] = getelementptr [10 x i32], ptr %2, i32 0, i64 %[[idx64m1]]
-! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i64 %6
-! CHECK: store <4 x i32> %[[arg1]], ptr %[[trg]], align 1
+! LLVMIR: %[[i:.*]] = load i32, ptr %3, align 4
+! LLVMIR: %[[iext:.*]] = sext i32 %[[i]] to i64
+! LLVMIR: %[[isub:.*]] = sub i64 %[[iext]], 1
+! LLVMIR: %[[imul1:.*]] = mul i64 %[[isub]], 1
+! LLVMIR: %[[imul2:.*]] = mul i64 %[[imul1]], 1
+! LLVMIR: %[[iadd:.*]] = add i64 %[[imul2]], 0
+! LLVMIR: %[[gep1:.*]] = getelementptr i32, ptr %2, i64 %[[iadd]]
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i64, ptr %1, align 8
+! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i64 %[[arg2]]
+! LLVMIR: store <4 x i32> %[[arg1]], ptr %[[gep2]], align 16
 end subroutine vec_xst_test_vi4i8ia4
 
 ! CHECK-LABEL: vec_xst_test_vi2i4vi2
@@ -658,24 +335,10 @@ subroutine vec_xst_test_vi2i4vi2(arg1, arg2, arg3)
   vector(integer(2)) :: arg3
   call vec_xst(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
-! CHECK-FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %arg2 : !llvm.ptr<vector<8xi16>> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
-! CHECK: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
-! CHECK:  store <8 x i16> %[[arg1]], ptr %[[addr]], align 1
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
+! LLVMIR: store <8 x i16> %[[arg1]], ptr %[[addr]], align 16
 end subroutine vec_xst_test_vi2i4vi2
 
 ! CHECK-LABEL: vec_xst_test_vi4i4vai4
@@ -686,38 +349,17 @@ subroutine vec_xst_test_vi4i4vai4(arg1, arg2, arg3, i)
   integer(4) :: i
   call vec_xst(arg1, arg2, arg3(i))
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
-! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref<i32>
-! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
-! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64
-! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64
-! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %[[arg3:.*]], %[[idx64m1]] : (!fir.ref<!fir.array<20x!fir.vector<4:i32>>>, i64) -> !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[elemptr:.*]] = fir.convert %[[elem]] : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
-! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
-! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]]  : i64
-! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %[[arg3:.*]][0, %[[idx64m1]]] : (!llvm.ptr<array<20 x vector<4xi32>>>, i64) -> !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[elemptr:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr<vector<4xi32>> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemptr]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
-! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4
-! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64
-! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
-! CHECK: %[[elem:.*]] = getelementptr [20 x <4 x i32>], ptr %2, i32 0, i64 %[[idx64m1]]
-! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i32 %[[arg2]]
-! CHECK: store <4 x i32> %[[arg1]], ptr %[[trg]], align 1
+! LLVMIR: %[[i:.*]] = load i32, ptr %3, align 4
+! LLVMIR: %[[iext:.*]] = sext i32 %[[i]] to i64
+! LLVMIR: %[[isub:.*]] = sub i64 %[[iext]], 1
+! LLVMIR: %[[imul1:.*]] = mul i64 %[[isub]], 1
+! LLVMIR: %[[imul2:.*]] = mul i64 %[[imul1]], 1
+! LLVMIR: %[[iadd:.*]] = add i64 %[[imul2]], 0
+! LLVMIR: %[[gep1:.*]] = getelementptr <4 x i32>, ptr %2, i64 %[[iadd]]
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
+! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i32 %[[arg2]]
+! LLVMIR: store <4 x i32> %[[arg1]], ptr %[[gep2]], align 16
 end subroutine vec_xst_test_vi4i4vai4
 
 !----------------------
@@ -731,31 +373,11 @@ subroutine vec_xst_be_test_vr4i2r4(arg1, arg2, arg3)
   real(4) :: arg3
   call vec_xst_be(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
-! CHECK-FIR: %[[arg3ptr:.*]] = fir.convert %arg2 : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3ptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[undef:.*]] = fir.undefined vector<4xf32>
-! CHECK-FIR: %[[shf:.*]] = vector.shuffle %[[varg1]], %[[undef]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
-! CHECK-FIR: %[[fvarg1:.*]] = fir.convert %[[shf]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[fvarg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[arg3ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3ptr]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xf32>
-! CHECK-LLVMIR: %[[shf:.*]] = llvm.shufflevector %[[arg1]], %[[undef]] [3, 2, 1, 0] : vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[shf]], %[[trg]]
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
-! CHECK: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]]
-! CHECK: %[[shf:.*]] = shufflevector <4 x float> %[[arg1]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-! CHECK: store <4 x float> %[[shf]], ptr %[[addr]], align 1
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]]
+! LLVMIR: %[[shf:.*]] = shufflevector <4 x float> %[[arg1]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x float> %[[shf]], ptr %[[addr]], align 16
 end subroutine vec_xst_be_test_vr4i2r4
 
 ! CHECK-LABEL: vec_xst_be_test_vi4i8ia4
@@ -766,45 +388,18 @@ subroutine vec_xst_be_test_vi4i8ia4(arg1, arg2, arg3, i)
   integer(4) :: i
   call vec_xst_be(arg1, arg2, arg3(i))
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i64>
-! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref<i32>
-! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
-! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64
-! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64
-! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %arg2, %[[idx64m1]] : (!fir.ref<!fir.array<10xi32>>, i64) -> !fir.ref<i32>
-! CHECK-FIR: %[[elemref:.*]] = fir.convert %[[elem]] : (!fir.ref<i32>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemref]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[undef:.*]] = fir.undefined vector<4xi32>
-! CHECK-FIR: %[[shf:.*]] = vector.shuffle %[[varg1]], %[[undef]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
-! CHECK-FIR: %[[src:.*]] = fir.convert %[[shf]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[src]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i64>
-! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
-! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
-! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]]  : i64
-! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %arg2[0, %[[idx64m1]]] : (!llvm.ptr<array<10 x i32>>, i64) -> !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[elemref:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr<i32> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemref]][%[[arg2]]] : (!llvm.ptr<i8>, i64) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xi32>
-! CHECK-LLVMIR: %[[src:.*]] = llvm.shufflevector %[[arg1]], %[[undef]] [3, 2, 1, 0] : vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]]
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[arg2:.*]] = load i64, ptr %1, align 8
-! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4
-! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64
-! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
-! CHECK: %[[elem:.*]] = getelementptr [10 x i32], ptr %2, i32 0, i64 %[[idx64m1]]
-! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i64 %6
-! CHECK: %[[src:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-! CHECK: store <4 x i32> %[[src]], ptr %[[trg]], align 1
+! LLVMIR: %[[i:.*]] = load i32, ptr %3, align 4
+! LLVMIR: %[[iext:.*]] = sext i32 %[[i]] to i64
+! LLVMIR: %[[isub:.*]] = sub i64 %[[iext]], 1
+! LLVMIR: %[[imul1:.*]] = mul i64 %[[isub]], 1
+! LLVMIR: %[[imul2:.*]] = mul i64 %[[imul1]], 1
+! LLVMIR: %[[iadd:.*]] = add i64 %[[imul2]], 0
+! LLVMIR: %[[gep1:.*]] = getelementptr i32, ptr %2, i64 %[[iadd]]
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i64, ptr %1, align 8
+! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i64 %[[arg2]]
+! LLVMIR: %[[src:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x i32> %[[src]], ptr %[[gep2]], align 16
 end subroutine vec_xst_be_test_vi4i8ia4
 
 ! CHECK-LABEL: vec_xst_be_test_vi2i4vi2
@@ -814,31 +409,11 @@ subroutine vec_xst_be_test_vi2i4vi2(arg1, arg2, arg3)
   vector(integer(2)) :: arg3
   call vec_xst_be(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
-! CHECK-FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[undef:.*]] = fir.undefined vector<8xi16>
-! CHECK-FIR: %[[shf:.*]] = vector.shuffle %[[varg1]], %[[undef]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>, vector<8xi16>
-! CHECK-FIR: %[[src:.*]] = fir.convert %[[shf]] : (vector<8xi16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[src]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %arg2 : !llvm.ptr<vector<8xi16>> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[undef:.*]] = llvm.mlir.undef : vector<8xi16>
-! CHECK-LLVMIR: %[[src:.*]] = llvm.shufflevector %[[arg1]], %[[undef]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]]
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
-! CHECK: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
-! CHECK: %[[src:.*]] = shufflevector <8 x i16> %[[arg1]], <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-! CHECK:  store <8 x i16> %[[src]], ptr %[[addr]], align 1
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
+! LLVMIR: %[[src:.*]] = shufflevector <8 x i16> %[[arg1]], <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <8 x i16> %[[src]], ptr %[[addr]], align 16
 end subroutine vec_xst_be_test_vi2i4vi2
 
 ! CHECK-LABEL: vec_xst_be_test_vi4i4vai4
@@ -849,45 +424,18 @@ subroutine vec_xst_be_test_vi4i4vai4(arg1, arg2, arg3, i)
   integer(4) :: i
   call vec_xst_be(arg1, arg2, arg3(i))
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
-! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref<i32>
-! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
-! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64
-! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64
-! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %[[arg3:.*]], %[[idx64m1]] : (!fir.ref<!fir.array<20x!fir.vector<4:i32>>>, i64) -> !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[elemptr:.*]] = fir.convert %[[elem]] : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[undef:.*]] = fir.undefined vector<4xi32>
-! CHECK-FIR: %[[shf:.*]] = vector.shuffle %[[varg1]], %[[undef]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
-! CHECK-FIR: %[[src:.*]] = fir.convert %[[shf]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[src]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
-! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
-! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]]  : i64
-! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %[[arg3:.*]][0, %[[idx64m1]]] : (!llvm.ptr<array<20 x vector<4xi32>>>, i64) -> !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[elemptr:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr<vector<4xi32>> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemptr]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xi32>
-! CHECK-LLVMIR: %[[src:.*]] = llvm.shufflevector %[[arg1]], %[[undef]] [3, 2, 1, 0] : vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]]
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
-! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4
-! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64
-! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
-! CHECK: %[[elem:.*]] = getelementptr [20 x <4 x i32>], ptr %2, i32 0, i64 %[[idx64m1]]
-! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i32 %[[arg2]]
-! CHECK: %[[src:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-! CHECK: store <4 x i32> %[[src]], ptr %[[trg]], align 1
+! LLVMIR: %[[i:.*]] = load i32, ptr %3, align 4
+! LLVMIR: %[[iext:.*]] = sext i32 %[[i]] to i64
+! LLVMIR: %[[isub:.*]] = sub i64 %[[iext]], 1
+! LLVMIR: %[[imul1:.*]] = mul i64 %[[isub]], 1
+! LLVMIR: %[[imul2:.*]] = mul i64 %[[imul1]], 1
+! LLVMIR: %[[iadd:.*]] = add i64 %[[imul2]], 0
+! LLVMIR: %[[gep1:.*]] = getelementptr <4 x i32>, ptr %2, i64 %[[iadd]]
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 
+! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i32 %[[arg2]]
+! LLVMIR: %[[src:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x i32> %[[src]], ptr %[[gep2]], align 16
 end subroutine vec_xst_be_test_vi4i4vai4
 
 !----------------------
@@ -901,29 +449,12 @@ subroutine vec_xstd2_test_vr4i2r4(arg1, arg2, arg3)
   real(4) :: arg3
   call vec_xstd2(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
-! CHECK-FIR: %[[arg3ptr:.*]] = fir.convert %arg2 : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3ptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[bcsrc:.*]] = vector.bitcast %[[vsrc]] : vector<4xf32> to vector<2xi64>
-! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[bcsrc]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[arg3ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3ptr]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[src:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<2xi64>
-! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]]
   
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
-! CHECK: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]]
-! CHECK: %[[src:.*]] = bitcast <4 x float> %[[arg1]] to <2 x i64>
-! CHECK: store <2 x i64> %[[src]], ptr %[[addr]], align 1
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]]
+! LLVMIR: %[[src:.*]] = bitcast <4 x float> %[[arg1]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[src]], ptr %[[addr]], align 16
 end subroutine vec_xstd2_test_vr4i2r4
 
 ! CHECK-LABEL: vec_xstd2_test_vi4i8ia4
@@ -934,43 +465,18 @@ subroutine vec_xstd2_test_vi4i8ia4(arg1, arg2, arg3, i)
   integer(4) :: i
   call vec_xstd2(arg1, arg2, arg3(i))
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i64>
-! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref<i32>
-! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
-! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64
-! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64
-! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %arg2, %[[idx64m1]] : (!fir.ref<!fir.array<10xi32>>, i64) -> !fir.ref<i32>
-! CHECK-FIR: %[[elemref:.*]] = fir.convert %[[elem]] : (!fir.ref<i32>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemref]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcsrc:.*]] = vector.bitcast %[[vsrc]] : vector<4xi32> to vector<2xi64>
-! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[bcsrc]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i64>
-! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
-! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
-! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]]  : i64
-! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %arg2[0, %[[idx64m1]]] : (!llvm.ptr<array<10 x i32>>, i64) -> !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[elemref:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr<i32> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemref]][%[[arg2]]] : (!llvm.ptr<i8>, i64) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[src:.*]] = llvm.bitcast %[[arg1]] : vector<4xi32> to vector<2xi64>
-! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]]
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[arg2:.*]] = load i64, ptr %1, align 8
-! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4
-! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64
-! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
-! CHECK: %[[elem:.*]] = getelementptr [10 x i32], ptr %2, i32 0, i64 %[[idx64m1]]
-! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i64 %6
-! CHECK: %[[src:.*]] = bitcast <4 x i32> %[[arg1]] to <2 x i64>
-! CHECK: store <2 x i64> %[[src]], ptr %[[trg]], align 1
+! LLVMIR: %[[i:.*]] = load i32, ptr %3, align 4
+! LLVMIR: %[[iext:.*]] = sext i32 %[[i]] to i64
+! LLVMIR: %[[isub:.*]] = sub i64 %[[iext]], 1
+! LLVMIR: %[[imul1:.*]] = mul i64 %[[isub]], 1
+! LLVMIR: %[[imul2:.*]] = mul i64 %[[imul1]], 1
+! LLVMIR: %[[iadd:.*]] = add i64 %[[imul2]], 0
+! LLVMIR: %[[gep1:.*]] = getelementptr i32, ptr %2, i64 %[[iadd]]
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i64, ptr %1, align 8
+! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i64 %[[arg2]]
+! LLVMIR: %[[src:.*]] = bitcast <4 x i32> %[[arg1]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[src]], ptr %[[gep2]], align 16
 end subroutine vec_xstd2_test_vi4i8ia4
 
 ! CHECK-LABEL: vec_xstd2_test_vi2i4vi2
@@ -980,29 +486,11 @@ subroutine vec_xstd2_test_vi2i4vi2(arg1, arg2, arg3)
   vector(integer(2)) :: arg3
   call vec_xstd2(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
-! CHECK-FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[bcsrc:.*]] = vector.bitcast %[[vsrc]] : vector<8xi16> to vector<2xi64>
-! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[bcsrc]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %arg2 : !llvm.ptr<vector<8xi16>> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[src:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<2xi64>
-! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]]
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
-! CHECK: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
-! CHECK: %[[src:.*]] = bitcast <8 x i16> %[[arg1]] to <2 x i64>
-! CHECK:  store <2 x i64> %[[src]], ptr %[[addr]], align 1
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
+! LLVMIR: %[[src:.*]] = bitcast <8 x i16> %[[arg1]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[src]], ptr %[[addr]], align 16
 end subroutine vec_xstd2_test_vi2i4vi2
 
 ! CHECK-LABEL: vec_xstd2_test_vi4i4vai4
@@ -1013,43 +501,18 @@ subroutine vec_xstd2_test_vi4i4vai4(arg1, arg2, arg3, i)
   integer(4) :: i
   call vec_xstd2(arg1, arg2, arg3(i))
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
-! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref<i32>
-! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
-! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64
-! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64
-! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %[[arg3:.*]], %[[idx64m1]] : (!fir.ref<!fir.array<20x!fir.vector<4:i32>>>, i64) -> !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[elemptr:.*]] = fir.convert %[[elem]] : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[bcsrc:.*]] = vector.bitcast %[[vsrc]] : vector<4xi32> to vector<2xi64>
-! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[bcsrc]] : (vector<2xi64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
-! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
-! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]]  : i64
-! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %[[arg3:.*]][0, %[[idx64m1]]] : (!llvm.ptr<array<20 x vector<4xi32>>>, i64) -> !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[elemptr:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr<vector<4xi32>> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemptr]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[src:.*]] = llvm.bitcast %[[arg1]] : vector<4xi32> to vector<2xi64>
-! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]]
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
-! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4
-! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64
-! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
-! CHECK: %[[elem:.*]] = getelementptr [20 x <4 x i32>], ptr %2, i32 0, i64 %[[idx64m1]]
-! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i32 %[[arg2]]
-! CHECK: %[[src:.*]] = bitcast <4 x i32> %[[arg1]] to <2 x i64>
-! CHECK: store <2 x i64> %[[src]], ptr %[[trg]], align 1
+! LLVMIR: %[[i:.*]] = load i32, ptr %3, align 4
+! LLVMIR: %[[iext:.*]] = sext i32 %[[i]] to i64
+! LLVMIR: %[[isub:.*]] = sub i64 %[[iext]], 1
+! LLVMIR: %[[imul1:.*]] = mul i64 %[[isub]], 1
+! LLVMIR: %[[imul2:.*]] = mul i64 %[[imul1]], 1
+! LLVMIR: %[[iadd:.*]] = add i64 %[[imul2]], 0
+! LLVMIR: %[[gep1:.*]] = getelementptr <4 x i32>, ptr %2, i64 %[[iadd]]
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 
+! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i32 %[[arg2]]
+! LLVMIR: %[[src:.*]] = bitcast <4 x i32> %[[arg1]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[src]], ptr %[[gep2]], align 16
 end subroutine vec_xstd2_test_vi4i4vai4
 
 !----------------------
@@ -1063,26 +526,11 @@ subroutine vec_xstw4_test_vr4i2r4(arg1, arg2, arg3)
   real(4) :: arg3
   call vec_xstw4(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
-! CHECK-FIR: %[[arg3ptr:.*]] = fir.convert %arg2 : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3ptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[vsrc]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
-! CHECK-LLVMIR: %[[arg3ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3ptr]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
   
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
-! CHECK: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]]
-! CHECK: store <4 x float> %[[arg1]], ptr %[[addr]], align 1
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]]
+! LLVMIR: store <4 x float> %[[arg1]], ptr %[[addr]], align 16
 end subroutine vec_xstw4_test_vr4i2r4
 
 ! CHECK-LABEL: vec_xstw4_test_vi4i8ia4
@@ -1093,40 +541,17 @@ subroutine vec_xstw4_test_vi4i8ia4(arg1, arg2, arg3, i)
   integer(4) :: i
   call vec_xstw4(arg1, arg2, arg3(i))
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i64>
-! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref<i32>
-! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
-! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64
-! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64
-! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %arg2, %[[idx64m1]] : (!fir.ref<!fir.array<10xi32>>, i64) -> !fir.ref<i32>
-! CHECK-FIR: %[[elemref:.*]] = fir.convert %[[elem]] : (!fir.ref<i32>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemref]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[vsrc]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]]
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i64>
-! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
-! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
-! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]]  : i64
-! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %arg2[0, %[[idx64m1]]] : (!llvm.ptr<array<10 x i32>>, i64) -> !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[elemref:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr<i32> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemref]][%[[arg2]]] : (!llvm.ptr<i8>, i64) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[arg2:.*]] = load i64, ptr %1, align 8
-! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4
-! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64
-! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
-! CHECK: %[[elem:.*]] = getelementptr [10 x i32], ptr %2, i32 0, i64 %[[idx64m1]]
-! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i64 %6
-! CHECK: store <4 x i32> %[[arg1]], ptr %[[trg]], align 1
+! LLVMIR: %[[i:.*]] = load i32, ptr %3, align 4
+! LLVMIR: %[[iext:.*]] = sext i32 %[[i]] to i64
+! LLVMIR: %[[isub:.*]] = sub i64 %[[iext]], 1
+! LLVMIR: %[[imul1:.*]] = mul i64 %[[isub]], 1
+! LLVMIR: %[[imul2:.*]] = mul i64 %[[imul1]], 1
+! LLVMIR: %[[iadd:.*]] = add i64 %[[imul2]], 0
+! LLVMIR: %[[gep1:.*]] = getelementptr i32, ptr %2, i64 %[[iadd]]
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i64, ptr %1, align 8
+! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i64 %[[arg2]]
+! LLVMIR: store <4 x i32> %[[arg1]], ptr %[[gep2]], align 16
 end subroutine vec_xstw4_test_vi4i8ia4
 
 ! CHECK-LABEL: vec_xstw4_test_vi2i4vi2
@@ -1136,29 +561,11 @@ subroutine vec_xstw4_test_vi2i4vi2(arg1, arg2, arg3)
   vector(integer(2)) :: arg3
   call vec_xstw4(arg1, arg2, arg3)
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
-! CHECK-FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[bcsrc:.*]] = vector.bitcast %[[vsrc]] : vector<8xi16> to vector<4xi32>
-! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[bcsrc]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %arg2 : !llvm.ptr<vector<8xi16>> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[src:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
-! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]]
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16
-! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
-! CHECK: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
-! CHECK: %[[src:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
-! CHECK:  store <4 x i32> %[[src]], ptr %[[addr]], align 1
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
+! LLVMIR: %[[src:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[src]], ptr %[[addr]], align 16
 end subroutine vec_xstw4_test_vi2i4vi2
 
 ! CHECK-LABEL: vec_xstw4_test_vi4i4vai4
@@ -1169,38 +576,15 @@ subroutine vec_xstw4_test_vi4i4vai4(arg1, arg2, arg3, i)
   integer(4) :: i
   call vec_xstw4(arg1, arg2, arg3(i))
 
-! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
-! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref<i32>
-! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
-! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64
-! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64
-! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %[[arg3:.*]], %[[idx64m1]] : (!fir.ref<!fir.array<20x!fir.vector<4:i32>>>, i64) -> !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[elemptr:.*]] = fir.convert %[[elem]] : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
-! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[vsrc]] : (vector<4xi32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]]
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
-! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
-! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
-! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]]  : i64
-! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %[[arg3:.*]][0, %[[idx64m1]]] : (!llvm.ptr<array<20 x vector<4xi32>>>, i64) -> !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[elemptr:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr<vector<4xi32>> to !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemptr]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
-! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
-! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
-! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4
-! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64
-! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
-! CHECK: %[[elem:.*]] = getelementptr [20 x <4 x i32>], ptr %2, i32 0, i64 %[[idx64m1]]
-! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i32 %[[arg2]]
-! CHECK: store <4 x i32> %[[arg1]], ptr %[[trg]], align 1
+! LLVMIR: %[[i:.*]] = load i32, ptr %3, align 4
+! LLVMIR: %[[iext:.*]] = sext i32 %[[i]] to i64
+! LLVMIR: %[[isub:.*]] = sub i64 %[[iext]], 1
+! LLVMIR: %[[imul1:.*]] = mul i64 %[[isub]], 1
+! LLVMIR: %[[imul2:.*]] = mul i64 %[[imul1]], 1
+! LLVMIR: %[[iadd:.*]] = add i64 %[[imul2]], 0
+! LLVMIR: %[[gep1:.*]] = getelementptr <4 x i32>, ptr %2, i64 %[[iadd]]
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 
+! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i32 %[[arg2]]
+! LLVMIR: store <4 x i32> %[[arg1]], ptr %[[gep2]], align 16
 end subroutine vec_xstw4_test_vi4i4vai4

diff  --git a/flang/test/Lower/PowerPC/ppc-vec_abs.f90 b/flang/test/Lower/PowerPC/ppc-vec_abs.f90
deleted file mode 100644
index d7fdd0431e76ea3..000000000000000
--- a/flang/test/Lower/PowerPC/ppc-vec_abs.f90
+++ /dev/null
@@ -1,131 +0,0 @@
-! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s
-! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
-! REQUIRES: target=powerpc{{.*}}
-
-!----------------------
-! vec_abs
-!----------------------
-
-! CHECK-LABEL: vec_abs_i1
-subroutine vec_abs_i1(arg1)
-  vector(integer(1)) :: arg1, r
-  r = vec_abs(arg1)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[zero:.*]] = arith.constant 0 : i8
-! CHECK-FIR: %[[vzero:.*]] = vector.broadcast %[[zero]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[sub:.*]] = arith.subi %[[vzero]], %[[varg1]] : vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vmaxsb(%[[sub]], %[[varg1]]) fastmath<contract> : (vector<16xi8>, vector<16xi8>) -> !fir.vector<16:i8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %{{.*}} = llvm.mlir.constant(0 : i8) : i8
-! CHECK-LLVMIR: %[[vzero:.*]] = llvm.mlir.constant(dense<0> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[sub:.*]] = llvm.sub %[[vzero]], %[[arg1]]  : vector<16xi8>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vmaxsb(%[[sub]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<16xi8>, vector<16xi8>) -> vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[sub:.*]] = sub <16 x i8> zeroinitializer, %[[arg1]]
-! CHECK: %{{[0-9]+}} = call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %[[sub]], <16 x i8> %[[arg1]])
-end subroutine vec_abs_i1
-
-! CHECK-LABEL: vec_abs_i2
-subroutine vec_abs_i2(arg1)
-  vector(integer(2)) :: arg1, r
-  r = vec_abs(arg1)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[zero:.*]] = arith.constant 0 : i16
-! CHECK-FIR: %[[vzero:.*]] = vector.broadcast %[[zero]] : i16 to vector<8xi16>
-! CHECK-FIR: %[[sub:.*]] = arith.subi %[[vzero]], %[[varg1]] : vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vmaxsh(%[[sub]], %[[varg1]]) fastmath<contract> : (vector<8xi16>, vector<8xi16>) -> !fir.vector<8:i16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %{{.*}} = llvm.mlir.constant(0 : i16) : i16
-! CHECK-LLVMIR: %[[vzero:.*]] = llvm.mlir.constant(dense<0> : vector<8xi16>) : vector<8xi16>
-! CHECK-LLVMIR: %[[sub:.*]] = llvm.sub %[[vzero]], %[[arg1]]  : vector<8xi16>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vmaxsh(%[[sub]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<8xi16>, vector<8xi16>) -> vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[sub:.*]] = sub <8 x i16> zeroinitializer, %[[arg1]]
-! CHECK: %{{[0-9]+}} = call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %[[sub]], <8 x i16> %[[arg1]])
-end subroutine vec_abs_i2
-
-! CHECK-LABEL: vec_abs_i4
-subroutine vec_abs_i4(arg1)
-  vector(integer(4)) :: arg1, r
-  r = vec_abs(arg1)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[zero:.*]] = arith.constant 0 : i32
-! CHECK-FIR: %[[vzero:.*]] = vector.broadcast %[[zero]] : i32 to vector<4xi32>
-! CHECK-FIR: %[[sub:.*]] = arith.subi %[[vzero]], %[[varg1]] : vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vmaxsw(%[[sub]], %[[varg1]]) fastmath<contract> : (vector<4xi32>, vector<4xi32>) -> !fir.vector<4:i32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %{{.*}} = llvm.mlir.constant(0 : i32) : i32
-! CHECK-LLVMIR: %[[vzero:.*]] = llvm.mlir.constant(dense<0> : vector<4xi32>) : vector<4xi32>
-! CHECK-LLVMIR: %[[sub:.*]] = llvm.sub %[[vzero]], %[[arg1]]  : vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vmaxsw(%[[sub]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[sub:.*]] = sub <4 x i32> zeroinitializer, %[[arg1]]
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %[[sub]], <4 x i32> %[[arg1]])
-end subroutine vec_abs_i4
-
-! CHECK-LABEL: vec_abs_i8
-subroutine vec_abs_i8(arg1)
-  vector(integer(8)) :: arg1, r
-  r = vec_abs(arg1)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[zero:.*]] = arith.constant 0 : i64
-! CHECK-FIR: %[[vzero:.*]] = vector.broadcast %[[zero]] : i64 to vector<2xi64>
-! CHECK-FIR: %[[sub:.*]] = arith.subi %[[vzero]], %[[varg1]] : vector<2xi64>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vmaxsd(%[[sub]], %[[varg1]]) fastmath<contract> : (vector<2xi64>, vector<2xi64>) -> !fir.vector<2:i64>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %{{.*}} = llvm.mlir.constant(0 : i64) : i64
-! CHECK-LLVMIR: %[[vzero:.*]] = llvm.mlir.constant(dense<0> : vector<2xi64>) : vector<2xi64>
-! CHECK-LLVMIR: %[[sub:.*]] = llvm.sub %[[vzero]], %[[arg1]]  : vector<2xi64>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vmaxsd(%[[sub]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xi64>, vector<2xi64>) -> vector<2xi64>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[sub:.*]] = sub <2 x i64> zeroinitializer, %[[arg1]]
-! CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.altivec.vmaxsd(<2 x i64> %[[sub]], <2 x i64> %[[arg1]])
-end subroutine vec_abs_i8
-
-! CHECK-LABEL: vec_abs_r4
-subroutine vec_abs_r4(arg1)
-  vector(real(4)) :: arg1, r
-  r = vec_abs(arg1)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.fabs.v4f32(%[[arg1]]) fastmath<contract> : (!fir.vector<4:f32>) -> !fir.vector<4:f32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.fabs.v4f32(%[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xf32>) -> vector<4xf32>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call contract <4 x float> @llvm.fabs.v4f32(<4 x float> %[[arg1]])
-end subroutine vec_abs_r4
-
-! CHECK-LABEL: vec_abs_r8
-subroutine vec_abs_r8(arg1)
-  vector(real(8)) :: arg1, r
-  r = vec_abs(arg1)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.fabs.v2f64(%[[arg1]]) fastmath<contract> : (!fir.vector<2:f64>) -> !fir.vector<2:f64>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.fabs.v2f64(%[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xf64>) -> vector<2xf64>
-
-! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call contract <2 x double> @llvm.fabs.v2f64(<2 x double> %[[arg1]])
-end subroutine vec_abs_r8
-

diff  --git a/flang/test/Lower/PowerPC/ppc-vec_add-and-mul-sub-xor.f90 b/flang/test/Lower/PowerPC/ppc-vec_add-and-mul-sub-xor.f90
deleted file mode 100644
index d6ce91e067a8bf5..000000000000000
--- a/flang/test/Lower/PowerPC/ppc-vec_add-and-mul-sub-xor.f90
+++ /dev/null
@@ -1,1055 +0,0 @@
-! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s
-! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
-! REQUIRES: target=powerpc{{.*}}
-
-! vec_add
-
-! CHECK-LABEL: vec_add_testf32
-subroutine vec_add_testf32(x, y)
-  vector(real(4)) :: vsum, x, y
-  vsum = vec_add(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[vsum:.*]] = arith.addf %[[vx]], %[[vy]] fastmath<contract> : vector<4xf32>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vsum]] : (vector<4xf32>) -> !fir.vector<4:f32>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.fadd %[[x]], %[[y]]  {fastmathFlags = #llvm.fastmath<contract>} : vector<4xf32>
-
-! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = fadd contract <4 x float> %[[x]], %[[y]]
-end subroutine vec_add_testf32
-
-! CHECK-LABEL: vec_add_testf64
-subroutine vec_add_testf64(x, y)
-  vector(real(8)) :: vsum, x, y
-  vsum = vec_add(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[vsum:.*]] = arith.addf %[[vx]], %[[vy]] fastmath<contract> : vector<2xf64>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vsum]] : (vector<2xf64>) -> !fir.vector<2:f64>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.fadd %[[x]], %[[y]]  {fastmathFlags = #llvm.fastmath<contract>} : vector<2xf64>
-
-! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = fadd contract <2 x double> %[[x]], %[[y]]
-end subroutine vec_add_testf64
-
-! CHECK-LABEL: vec_add_testi8
-subroutine vec_add_testi8(x, y)
-  vector(integer(1)) :: vsum, x, y
-  vsum = vec_add(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[vsum:.*]] = arith.addi %[[vx]], %[[vy]] : vector<16xi8>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vsum]] : (vector<16xi8>) -> !fir.vector<16:i8>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.add %[[x]], %[[y]] : vector<16xi8>
-
-! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = add <16 x i8> %[[x]], %[[y]]
-end subroutine vec_add_testi8
-
-! CHECK-LABEL: vec_add_testi16
-subroutine vec_add_testi16(x, y)
-  vector(integer(2)) :: vsum, x, y
-  vsum = vec_add(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[vsum:.*]] = arith.addi %[[vx]], %[[vy]] : vector<8xi16>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vsum]] : (vector<8xi16>) -> !fir.vector<8:i16>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.add %[[x]], %[[y]] : vector<8xi16>
-
-! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = add <8 x i16> %[[x]], %[[y]]
-end subroutine vec_add_testi16
-
-! CHECK-LABEL: vec_add_testi32
-subroutine vec_add_testi32(x, y)
-  vector(integer(4)) :: vsum, x, y
-  vsum = vec_add(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[vsum:.*]] = arith.addi %[[vx]], %[[vy]] : vector<4xi32>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vsum]] : (vector<4xi32>) -> !fir.vector<4:i32>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.add %[[x]], %[[y]] : vector<4xi32>
-
-! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = add <4 x i32> %[[x]], %[[y]]
-end subroutine vec_add_testi32
-
-! CHECK-LABEL: vec_add_testi64
-subroutine vec_add_testi64(x, y)
-  vector(integer(8)) :: vsum, x, y
-  vsum = vec_add(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[vsum:.*]] = arith.addi %[[vx]], %[[vy]] : vector<2xi64>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vsum]] : (vector<2xi64>) -> !fir.vector<2:i64>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.add %[[x]], %[[y]] : vector<2xi64>
-
-! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = add <2 x i64> %[[x]], %[[y]]
-end subroutine vec_add_testi64
-
-! CHECK-LABEL: vec_add_testui8
-subroutine vec_add_testui8(x, y)
-  vector(unsigned(1)) :: vsum, x, y
-  vsum = vec_add(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[vsum:.*]] = arith.addi %[[vx]], %[[vy]] : vector<16xi8>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vsum]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.add %[[x]], %[[y]] : vector<16xi8>
-
-! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = add <16 x i8> %[[x]], %[[y]]
-end subroutine vec_add_testui8
-
-! CHECK-LABEL: vec_add_testui16
-subroutine vec_add_testui16(x, y)
-  vector(unsigned(2)) :: vsum, x, y
-  vsum = vec_add(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[vsum:.*]] = arith.addi %[[vx]], %[[vy]] : vector<8xi16>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vsum]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.add %[[x]], %[[y]] : vector<8xi16>
-
-! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = add <8 x i16> %[[x]], %[[y]]
-end subroutine vec_add_testui16
-
-! CHECK-LABEL: vec_add_testui32
-subroutine vec_add_testui32(x, y)
-  vector(unsigned(4)) :: vsum, x, y
-  vsum = vec_add(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[vsum:.*]] = arith.addi %[[vx]], %[[vy]] : vector<4xi32>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vsum]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.add %[[x]], %[[y]] : vector<4xi32>
-
-! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = add <4 x i32> %[[x]], %[[y]]
-end subroutine vec_add_testui32
-
-! CHECK-LABEL: vec_add_testui64
-subroutine vec_add_testui64(x, y)
-  vector(unsigned(8)) :: vsum, x, y
-  vsum = vec_add(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[vsum:.*]] = arith.addi %[[vx]], %[[vy]] : vector<2xi64>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vsum]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.add %[[x]], %[[y]] : vector<2xi64>
-
-! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = add <2 x i64> %[[x]], %[[y]]
-end subroutine vec_add_testui64
-
-! vec_mul
-
-! CHECK-LABEL: vec_mul_testf32
-subroutine vec_mul_testf32(x, y)
-  vector(real(4)) :: vmul, x, y
-  vmul = vec_mul(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[vmul:.*]] = arith.mulf %[[vx]], %[[vy]] fastmath<contract> : vector<4xf32>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vmul]] : (vector<4xf32>) -> !fir.vector<4:f32>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.fmul %[[x]], %[[y]]  {fastmathFlags = #llvm.fastmath<contract>} : vector<4xf32>
-
-! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = fmul contract <4 x float> %[[x]], %[[y]]
-end subroutine vec_mul_testf32
-
-! CHECK-LABEL: vec_mul_testf64
-subroutine vec_mul_testf64(x, y)
-  vector(real(8)) :: vmul, x, y
-  vmul = vec_mul(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[vmul:.*]] = arith.mulf %[[vx]], %[[vy]] fastmath<contract> : vector<2xf64>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vmul]] : (vector<2xf64>) -> !fir.vector<2:f64>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.fmul %[[x]], %[[y]]  {fastmathFlags = #llvm.fastmath<contract>} : vector<2xf64>
-
-! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = fmul contract <2 x double> %[[x]], %[[y]]
-end subroutine vec_mul_testf64
-
-! CHECK-LABEL: vec_mul_testi8
-subroutine vec_mul_testi8(x, y)
-  vector(integer(1)) :: vmul, x, y
-  vmul = vec_mul(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[vmul:.*]] = arith.muli %[[vx]], %[[vy]] : vector<16xi8>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vmul]] : (vector<16xi8>) -> !fir.vector<16:i8>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.mul %[[x]], %[[y]] : vector<16xi8>
-
-! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = mul <16 x i8> %[[x]], %[[y]]
-end subroutine vec_mul_testi8
-
-! CHECK-LABEL: vec_mul_testi16
-subroutine vec_mul_testi16(x, y)
-  vector(integer(2)) :: vmul, x, y
-  vmul = vec_mul(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[vmul:.*]] = arith.muli %[[vx]], %[[vy]] : vector<8xi16>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vmul]] : (vector<8xi16>) -> !fir.vector<8:i16>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.mul %[[x]], %[[y]] : vector<8xi16>
-
-! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = mul <8 x i16> %[[x]], %[[y]]
-end subroutine vec_mul_testi16
-
-! CHECK-LABEL: vec_mul_testi32
-subroutine vec_mul_testi32(x, y)
-  vector(integer(4)) :: vmul, x, y
-  vmul = vec_mul(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[vmul:.*]] = arith.muli %[[vx]], %[[vy]] : vector<4xi32>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vmul]] : (vector<4xi32>) -> !fir.vector<4:i32>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.mul %[[x]], %[[y]] : vector<4xi32>
-
-! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = mul <4 x i32> %[[x]], %[[y]]
-end subroutine vec_mul_testi32
-
-! CHECK-LABEL: vec_mul_testi64
-subroutine vec_mul_testi64(x, y)
-  vector(integer(8)) :: vmul, x, y
-  vmul = vec_mul(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[vmul:.*]] = arith.muli %[[vx]], %[[vy]] : vector<2xi64>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vmul]] : (vector<2xi64>) -> !fir.vector<2:i64>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.mul %[[x]], %[[y]] : vector<2xi64>
-
-! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = mul <2 x i64> %[[x]], %[[y]]
-end subroutine vec_mul_testi64
-
-! CHECK-LABEL: vec_mul_testui8
-subroutine vec_mul_testui8(x, y)
-  vector(unsigned(1)) :: vmul, x, y
-  vmul = vec_mul(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[vmul:.*]] = arith.muli %[[vx]], %[[vy]] : vector<16xi8>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vmul]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.mul %[[x]], %[[y]] : vector<16xi8>
-
-! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = mul <16 x i8> %[[x]], %[[y]]
-end subroutine vec_mul_testui8
-
-! CHECK-LABEL: vec_mul_testui16
-subroutine vec_mul_testui16(x, y)
-  vector(unsigned(2)) :: vmul, x, y
-  vmul = vec_mul(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[vmul:.*]] = arith.muli %[[vx]], %[[vy]] : vector<8xi16>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vmul]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.mul %[[x]], %[[y]] : vector<8xi16>
-
-! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = mul <8 x i16> %[[x]], %[[y]]
-end subroutine vec_mul_testui16
-
-! CHECK-LABEL: vec_mul_testui32
-subroutine vec_mul_testui32(x, y)
-  vector(unsigned(4)) :: vmul, x, y
-  vmul = vec_mul(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[vmul:.*]] = arith.muli %[[vx]], %[[vy]] : vector<4xi32>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vmul]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.mul %[[x]], %[[y]] : vector<4xi32>
-
-! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = mul <4 x i32> %[[x]], %[[y]]
-end subroutine vec_mul_testui32
-
-! CHECK-LABEL: vec_mul_testui64
-subroutine vec_mul_testui64(x, y)
-  vector(unsigned(8)) :: vmul, x, y
-  vmul = vec_mul(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[vmul:.*]] = arith.muli %[[vx]], %[[vy]] : vector<2xi64>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vmul]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.mul %[[x]], %[[y]] : vector<2xi64>
-
-! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = mul <2 x i64> %[[x]], %[[y]]
-end subroutine vec_mul_testui64
-
-! vec_sub
-
-! CHECK-LABEL: vec_sub_testf32
-subroutine vec_sub_testf32(x, y)
-  vector(real(4)) :: vsub, x, y
-  vsub = vec_sub(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[vsub:.*]] = arith.subf %[[vx]], %[[vy]] fastmath<contract> : vector<4xf32>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vsub]] : (vector<4xf32>) -> !fir.vector<4:f32>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.fsub %[[x]], %[[y]]  {fastmathFlags = #llvm.fastmath<contract>} : vector<4xf32>
-
-! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = fsub contract <4 x float> %[[x]], %[[y]]
-end subroutine vec_sub_testf32
-
-! CHECK-LABEL: vec_sub_testf64
-subroutine vec_sub_testf64(x, y)
-  vector(real(8)) :: vsub, x, y
-  vsub = vec_sub(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[vsub:.*]] = arith.subf %[[vx]], %[[vy]] fastmath<contract> : vector<2xf64>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vsub]] : (vector<2xf64>) -> !fir.vector<2:f64>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.fsub %[[x]], %[[y]]  {fastmathFlags = #llvm.fastmath<contract>} : vector<2xf64>
-
-! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = fsub contract <2 x double> %[[x]], %[[y]]
-end subroutine vec_sub_testf64
-
-! CHECK-LABEL: vec_sub_testi8
-subroutine vec_sub_testi8(x, y)
-  vector(integer(1)) :: vsub, x, y
-  vsub = vec_sub(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[vsub:.*]] = arith.subi %[[vx]], %[[vy]] : vector<16xi8>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vsub]] : (vector<16xi8>) -> !fir.vector<16:i8>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.sub %[[x]], %[[y]] : vector<16xi8>
-
-! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = sub <16 x i8> %[[x]], %[[y]]
-end subroutine vec_sub_testi8
-
-! CHECK-LABEL: vec_sub_testi16
-subroutine vec_sub_testi16(x, y)
-  vector(integer(2)) :: vsub, x, y
-  vsub = vec_sub(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[vsub:.*]] = arith.subi %[[vx]], %[[vy]] : vector<8xi16>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vsub]] : (vector<8xi16>) -> !fir.vector<8:i16>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.sub %[[x]], %[[y]] : vector<8xi16>
-
-! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = sub <8 x i16> %[[x]], %[[y]]
-end subroutine vec_sub_testi16
-
-! CHECK-LABEL: vec_sub_testi32
-subroutine vec_sub_testi32(x, y)
-  vector(integer(4)) :: vsub, x, y
-  vsub = vec_sub(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[vsub:.*]] = arith.subi %[[vx]], %[[vy]] : vector<4xi32>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vsub]] : (vector<4xi32>) -> !fir.vector<4:i32>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.sub %[[x]], %[[y]] : vector<4xi32>
-
-! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = sub <4 x i32> %[[x]], %[[y]]
-end subroutine vec_sub_testi32
-
-! CHECK-LABEL: vec_sub_testi64
-subroutine vec_sub_testi64(x, y)
-  vector(integer(8)) :: vsub, x, y
-  vsub = vec_sub(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[vsub:.*]] = arith.subi %[[vx]], %[[vy]] : vector<2xi64>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vsub]] : (vector<2xi64>) -> !fir.vector<2:i64>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.sub %[[x]], %[[y]] : vector<2xi64>
-
-! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = sub <2 x i64> %[[x]], %[[y]]
-end subroutine vec_sub_testi64
-
-! CHECK-LABEL: vec_sub_testui8
-subroutine vec_sub_testui8(x, y)
-  vector(unsigned(1)) :: vsub, x, y
-  vsub = vec_sub(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[vsub:.*]] = arith.subi %[[vx]], %[[vy]] : vector<16xi8>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vsub]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.sub %[[x]], %[[y]] : vector<16xi8>
-
-! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = sub <16 x i8> %[[x]], %[[y]]
-end subroutine vec_sub_testui8
-
-! CHECK-LABEL: vec_sub_testui16
-subroutine vec_sub_testui16(x, y)
-  vector(unsigned(2)) :: vsub, x, y
-  vsub = vec_sub(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[vsub:.*]] = arith.subi %[[vx]], %[[vy]] : vector<8xi16>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vsub]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.sub %[[x]], %[[y]] : vector<8xi16>
-
-! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = sub <8 x i16> %[[x]], %[[y]]
-end subroutine vec_sub_testui16
-
-! CHECK-LABEL: vec_sub_testui32
-subroutine vec_sub_testui32(x, y)
-  vector(unsigned(4)) :: vsub, x, y
-  vsub = vec_sub(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[vsub:.*]] = arith.subi %[[vx]], %[[vy]] : vector<4xi32>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vsub]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.sub %[[x]], %[[y]] : vector<4xi32>
-
-! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = sub <4 x i32> %[[x]], %[[y]]
-end subroutine vec_sub_testui32
-
-! CHECK-LABEL: vec_sub_testui64
-subroutine vec_sub_testui64(x, y)
-  vector(unsigned(8)) :: vsub, x, y
-  vsub = vec_sub(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[vsub:.*]] = arith.subi %[[vx]], %[[vy]] : vector<2xi64>
-! CHECK-FIR: %{{[0-9]}} = fir.convert %[[vsub]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %{{[0-9]}} = llvm.sub %[[x]], %[[y]] : vector<2xi64>
-
-! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %{{[0-9]}} = sub <2 x i64> %[[x]], %[[y]]
-end subroutine vec_sub_testui64
-
-!----------------------
-! vec_and
-!----------------------
-
-! CHECK-LABEL: vec_and_test_i8
-subroutine vec_and_test_i8(arg1, arg2)
-  vector(integer(1)) :: r, arg1, arg2
-  r = vec_and(arg1, arg2)
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = arith.andi %[[varg1]], %[[varg2]] : vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.and %[[arg1]], %[[arg2]] : vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = and <16 x i8> %[[arg1]], %[[arg2]]
-end subroutine vec_and_test_i8
-
-! CHECK-LABEL: vec_and_test_i16
-subroutine vec_and_test_i16(arg1, arg2)
-  vector(integer(2)) :: r, arg1, arg2
-  r = vec_and(arg1, arg2)
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = arith.andi %[[varg1]], %[[varg2]] : vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:i16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.and %[[arg1]], %[[arg2]] : vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = and <8 x i16> %[[arg1]], %[[arg2]]
-end subroutine vec_and_test_i16
-
-! CHECK-LABEL: vec_and_test_i32
-subroutine vec_and_test_i32(arg1, arg2)
-  vector(integer(4)) :: r, arg1, arg2
-  r = vec_and(arg1, arg2)
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = arith.andi %[[varg1]], %[[varg2]] : vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:i32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.and %[[arg1]], %[[arg2]] : vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = and <4 x i32> %[[arg1]], %[[arg2]]
-end subroutine vec_and_test_i32
-
-! CHECK-LABEL: vec_and_test_i64
-subroutine vec_and_test_i64(arg1, arg2)
-  vector(integer(8)) :: r, arg1, arg2
-  r = vec_and(arg1, arg2)
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = arith.andi %[[varg1]], %[[varg2]] : vector<2xi64>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:i64>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.and %[[arg1]], %[[arg2]] : vector<2xi64>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = and <2 x i64> %[[arg1]], %[[arg2]]
-end subroutine vec_and_test_i64
-
-! CHECK-LABEL: vec_and_test_u8
-subroutine vec_and_test_u8(arg1, arg2)
-  vector(unsigned(1)) :: r, arg1, arg2
-  r = vec_and(arg1, arg2)
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = arith.andi %[[varg1]], %[[varg2]] : vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.and %[[arg1]], %[[arg2]] : vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = and <16 x i8> %[[arg1]], %[[arg2]]
-end subroutine vec_and_test_u8
-
-! CHECK-LABEL: vec_and_test_u16
-subroutine vec_and_test_u16(arg1, arg2)
-  vector(unsigned(2)) :: r, arg1, arg2
-  r = vec_and(arg1, arg2)
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = arith.andi %[[varg1]], %[[varg2]] : vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.and %[[arg1]], %[[arg2]] : vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = and <8 x i16> %[[arg1]], %[[arg2]]
-end subroutine vec_and_test_u16
-
-! CHECK-LABEL: vec_and_test_u32
-subroutine vec_and_test_u32(arg1, arg2)
-  vector(unsigned(4)) :: r, arg1, arg2
-  r = vec_and(arg1, arg2)
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = arith.andi %[[varg1]], %[[varg2]] : vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.and %[[arg1]], %[[arg2]] : vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = and <4 x i32> %[[arg1]], %[[arg2]]
-end subroutine vec_and_test_u32
-
-! CHECK-LABEL: vec_and_test_u64
-subroutine vec_and_test_u64(arg1, arg2)
-  vector(unsigned(8)) :: r, arg1, arg2
-  r = vec_and(arg1, arg2)
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = arith.andi %[[varg1]], %[[varg2]] : vector<2xi64>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.and %[[arg1]], %[[arg2]] : vector<2xi64>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = and <2 x i64> %[[arg1]], %[[arg2]]
-end subroutine vec_and_test_u64
-
-! CHECK-LABEL: vec_and_testf32
-subroutine vec_and_testf32(arg1, arg2)
-  vector(real(4)) :: r, arg1, arg2
-  r = vec_and(arg1, arg2)
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<4xf32> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<4xf32> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = arith.andi %[[bc1]], %[[bc2]] : vector<4xi32>
-! CHECK-FIR: %[[vr:.*]] = vector.bitcast %[[r]] : vector<4xi32> to vector<4xf32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[vr]] : (vector<4xf32>) -> !fir.vector<4:f32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<4xi32>
-! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<4xf32> to vector<4xi32>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.and %[[bc1]], %[[bc2]]  : vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[r]] : vector<4xi32> to vector<4xf32>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[bc1:.*]] = bitcast <4 x float> %[[arg1]] to <4 x i32>
-! CHECK: %[[bc2:.*]] = bitcast <4 x float> %[[arg2]] to <4 x i32>
-! CHECK: %[[r:.*]] = and <4 x i32> %[[bc1]], %[[bc2]]
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[r]] to <4 x float>
-end subroutine vec_and_testf32
-
-! CHECK-LABEL: vec_and_testf64
-subroutine vec_and_testf64(arg1, arg2)
-  vector(real(8)) :: r, arg1, arg2
-  r = vec_and(arg1, arg2)
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<2xf64> to vector<2xi64>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<2xf64> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = arith.andi %[[bc1]], %[[bc2]] : vector<2xi64>
-! CHECK-FIR: %[[vr:.*]] = vector.bitcast %[[r]] : vector<2xi64> to vector<2xf64>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[vr]] : (vector<2xf64>) -> !fir.vector<2:f64>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<2xf64> to vector<2xi64>
-! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<2xf64> to vector<2xi64>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.and %[[bc1]], %[[bc2]]  : vector<2xi64>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[r]] : vector<2xi64> to vector<2xf64>
-
-! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[bc1:.*]] = bitcast <2 x double> %[[arg1]] to <2 x i64>
-! CHECK: %[[bc2:.*]] = bitcast <2 x double> %[[arg2]] to <2 x i64>
-! CHECK: %[[r:.*]] = and <2 x i64> %[[bc1]], %[[bc2]]
-! CHECK: %{{[0-9]+}} = bitcast <2 x i64> %[[r]] to <2 x double>
-end subroutine vec_and_testf64
-
-!----------------------
-! vec_xor
-!----------------------
-
-! CHECK-LABEL: vec_xor_test_i8
-subroutine vec_xor_test_i8(arg1, arg2)
-  vector(integer(1)) :: r, arg1, arg2
-  r = vec_xor(arg1, arg2)
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = arith.xori %[[varg1]], %[[varg2]] : vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:i8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[arg1]], %[[arg2]] : vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = xor <16 x i8> %[[arg1]], %[[arg2]]
-end subroutine vec_xor_test_i8
-
-! CHECK-LABEL: vec_xor_test_i16
-subroutine vec_xor_test_i16(arg1, arg2)
-  vector(integer(2)) :: r, arg1, arg2
-  r = vec_xor(arg1, arg2)
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = arith.xori %[[varg1]], %[[varg2]] : vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:i16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[arg1]], %[[arg2]] : vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = xor <8 x i16> %[[arg1]], %[[arg2]]
-end subroutine vec_xor_test_i16
-
-! CHECK-LABEL: vec_xor_test_i32
-subroutine vec_xor_test_i32(arg1, arg2)
-  vector(integer(4)) :: r, arg1, arg2
-  r = vec_xor(arg1, arg2)
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = arith.xori %[[varg1]], %[[varg2]] : vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:i32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[arg1]], %[[arg2]] : vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = xor <4 x i32> %[[arg1]], %[[arg2]]
-end subroutine vec_xor_test_i32
-
-! CHECK-LABEL: vec_xor_test_i64
-subroutine vec_xor_test_i64(arg1, arg2)
-  vector(integer(8)) :: r, arg1, arg2
-  r = vec_xor(arg1, arg2)
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = arith.xori %[[varg1]], %[[varg2]] : vector<2xi64>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:i64>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[arg1]], %[[arg2]] : vector<2xi64>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = xor <2 x i64> %[[arg1]], %[[arg2]]
-end subroutine vec_xor_test_i64
-
-! CHECK-LABEL: vec_xor_test_u8
-subroutine vec_xor_test_u8(arg1, arg2)
-  vector(unsigned(1)) :: r, arg1, arg2
-  r = vec_xor(arg1, arg2)
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[r:.*]] = arith.xori %[[varg1]], %[[varg2]] : vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[r]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[arg1]], %[[arg2]] : vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = xor <16 x i8> %[[arg1]], %[[arg2]]
-end subroutine vec_xor_test_u8
-
-! CHECK-LABEL: vec_xor_test_u16
-subroutine vec_xor_test_u16(arg1, arg2)
-  vector(unsigned(2)) :: r, arg1, arg2
-  r = vec_xor(arg1, arg2)
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[r:.*]] = arith.xori %[[varg1]], %[[varg2]] : vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[r]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[arg1]], %[[arg2]] : vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = xor <8 x i16> %[[arg1]], %[[arg2]]
-end subroutine vec_xor_test_u16
-
-! CHECK-LABEL: vec_xor_test_u32
-subroutine vec_xor_test_u32(arg1, arg2)
-  vector(unsigned(4)) :: r, arg1, arg2
-  r = vec_xor(arg1, arg2)
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = arith.xori %[[varg1]], %[[varg2]] : vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[r]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[arg1]], %[[arg2]] : vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = xor <4 x i32> %[[arg1]], %[[arg2]]
-end subroutine vec_xor_test_u32
-
-! CHECK-LABEL: vec_xor_test_u64
-subroutine vec_xor_test_u64(arg1, arg2)
-  vector(unsigned(8)) :: r, arg1, arg2
-  r = vec_xor(arg1, arg2)
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = arith.xori %[[varg1]], %[[varg2]] : vector<2xi64>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[r]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[arg1]], %[[arg2]] : vector<2xi64>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = xor <2 x i64> %[[arg1]], %[[arg2]]
-end subroutine vec_xor_test_u64
-
-! CHECK-LABEL: vec_xor_testf32
-subroutine vec_xor_testf32(arg1, arg2)
-  vector(real(4)) :: r, arg1, arg2
-  r = vec_xor(arg1, arg2)
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<4xf32> to vector<4xi32>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<4xf32> to vector<4xi32>
-! CHECK-FIR: %[[r:.*]] = arith.xori %[[bc1]], %[[bc2]] : vector<4xi32>
-! CHECK-FIR: %[[vr:.*]] = vector.bitcast %[[r]] : vector<4xi32> to vector<4xf32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[vr]] : (vector<4xf32>) -> !fir.vector<4:f32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<4xi32>
-! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<4xf32> to vector<4xi32>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.xor %[[bc1]], %[[bc2]]  : vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[r]] : vector<4xi32> to vector<4xf32>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[bc1:.*]] = bitcast <4 x float> %[[arg1]] to <4 x i32>
-! CHECK: %[[bc2:.*]] = bitcast <4 x float> %[[arg2]] to <4 x i32>
-! CHECK: %[[r:.*]] = xor <4 x i32> %[[bc1]], %[[bc2]]
-! CHECK: %{{[0-9]+}} = bitcast <4 x i32> %[[r]] to <4 x float>
-end subroutine vec_xor_testf32
-
-! CHECK-LABEL: vec_xor_testf64
-subroutine vec_xor_testf64(arg1, arg2)
-  vector(real(8)) :: r, arg1, arg2
-  r = vec_xor(arg1, arg2)
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[bc1:.*]] = vector.bitcast %[[varg1]] : vector<2xf64> to vector<2xi64>
-! CHECK-FIR: %[[bc2:.*]] = vector.bitcast %[[varg2]] : vector<2xf64> to vector<2xi64>
-! CHECK-FIR: %[[r:.*]] = arith.xori %[[bc1]], %[[bc2]] : vector<2xi64>
-! CHECK-FIR: %[[vr:.*]] = vector.bitcast %[[r]] : vector<2xi64> to vector<2xf64>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[vr]] : (vector<2xf64>) -> !fir.vector<2:f64>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<2xf64> to vector<2xi64>
-! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<2xf64> to vector<2xi64>
-! CHECK-LLVMIR: %[[r:.*]] = llvm.xor %[[bc1]], %[[bc2]]  : vector<2xi64>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[r]] : vector<2xi64> to vector<2xf64>
-
-! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[bc1:.*]] = bitcast <2 x double> %[[arg1]] to <2 x i64>
-! CHECK: %[[bc2:.*]] = bitcast <2 x double> %[[arg2]] to <2 x i64>
-! CHECK: %[[r:.*]] = xor <2 x i64> %[[bc1]], %[[bc2]]
-! CHECK: %{{[0-9]+}} = bitcast <2 x i64> %[[r]] to <2 x double>
-end subroutine vec_xor_testf64
-

diff  --git a/flang/test/Lower/PowerPC/ppc-vec_any.f90 b/flang/test/Lower/PowerPC/ppc-vec_any.f90
deleted file mode 100644
index 8ed40ec0d952ec1..000000000000000
--- a/flang/test/Lower/PowerPC/ppc-vec_any.f90
+++ /dev/null
@@ -1,219 +0,0 @@
-! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s
-! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
-! REQUIRES: target=powerpc{{.*}}
-
-!----------------------
-! vec_any_ge
-!----------------------
-
-! CHECK-LABEL: vec_any_ge_test_i1
-subroutine vec_any_ge_test_i1(arg1, arg2)
-  vector(integer(1)), intent(in) :: arg1, arg2
-  integer(4) :: r
-  r = vec_any_ge(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[op:.*]] = arith.constant 3 : i32
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsb.p(%[[op]], %[[arg2]], %[[arg1]]) fastmath<contract> : (i32, !fir.vector<16:i8>, !fir.vector<16:i8>) -> i32
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[op:.*]] = llvm.mlir.constant(3 : i32) : i32
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vcmpgtsb.p(%[[op]], %[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (i32, vector<16xi8>, vector<16xi8>) -> i32
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtsb.p(i32 3, <16 x i8> %[[arg2]], <16 x i8> %[[arg1]])
-end subroutine vec_any_ge_test_i1
-
-! CHECK-LABEL: vec_any_ge_test_i2
-subroutine vec_any_ge_test_i2(arg1, arg2)
-  vector(integer(2)), intent(in) :: arg1, arg2
-  integer(4) :: r
-  r = vec_any_ge(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[op:.*]] = arith.constant 3 : i32
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsh.p(%[[op]], %[[arg2]], %[[arg1]]) fastmath<contract> : (i32, !fir.vector<8:i16>, !fir.vector<8:i16>) -> i32
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[op:.*]] = llvm.mlir.constant(3 : i32) : i32
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vcmpgtsh.p(%[[op]], %[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (i32, vector<8xi16>, vector<8xi16>) -> i32
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtsh.p(i32 3, <8 x i16> %[[arg2]], <8 x i16> %[[arg1]])
-end subroutine vec_any_ge_test_i2
-
-! CHECK-LABEL: vec_any_ge_test_i4
-subroutine vec_any_ge_test_i4(arg1, arg2)
-  vector(integer(4)), intent(in) :: arg1, arg2
-  integer(4) :: r
-  r = vec_any_ge(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[op:.*]] = arith.constant 3 : i32
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsw.p(%[[op]], %[[arg2]], %[[arg1]]) fastmath<contract> : (i32, !fir.vector<4:i32>, !fir.vector<4:i32>) -> i32
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[op:.*]] = llvm.mlir.constant(3 : i32) : i32
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vcmpgtsw.p(%[[op]], %[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (i32, vector<4xi32>, vector<4xi32>) -> i32
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtsw.p(i32 3, <4 x i32> %[[arg2]], <4 x i32> %[[arg1]])
-end subroutine vec_any_ge_test_i4
-
-! CHECK-LABEL: vec_any_ge_test_i8
-subroutine vec_any_ge_test_i8(arg1, arg2)
-  vector(integer(8)), intent(in) :: arg1, arg2
-  integer(4) :: r
-  r = vec_any_ge(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[op:.*]] = arith.constant 3 : i32
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsd.p(%[[op]], %[[arg2]], %[[arg1]]) fastmath<contract> : (i32, !fir.vector<2:i64>, !fir.vector<2:i64>) -> i32
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[op:.*]] = llvm.mlir.constant(3 : i32) : i32
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vcmpgtsd.p(%[[op]], %[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (i32, vector<2xi64>, vector<2xi64>) -> i32
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtsd.p(i32 3, <2 x i64> %[[arg2]], <2 x i64> %[[arg1]])
-end subroutine vec_any_ge_test_i8
-
-! CHECK-LABEL: vec_any_ge_test_u1
-subroutine vec_any_ge_test_u1(arg1, arg2)
-  vector(unsigned(1)), intent(in) :: arg1, arg2
-  integer(4) :: r
-  r = vec_any_ge(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[op:.*]] = arith.constant 3 : i32
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtub.p(%[[op]], %[[arg2]], %[[arg1]]) fastmath<contract> : (i32, !fir.vector<16:ui8>, !fir.vector<16:ui8>) -> i32
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[op:.*]] = llvm.mlir.constant(3 : i32) : i32
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vcmpgtub.p(%[[op]], %[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (i32, vector<16xi8>, vector<16xi8>) -> i32
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtub.p(i32 3, <16 x i8> %[[arg2]], <16 x i8> %[[arg1]])
-end subroutine vec_any_ge_test_u1
-
-! CHECK-LABEL: vec_any_ge_test_u2
-subroutine vec_any_ge_test_u2(arg1, arg2)
-  vector(unsigned(2)), intent(in) :: arg1, arg2
-  integer(4) :: r
-  r = vec_any_ge(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[op:.*]] = arith.constant 3 : i32
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtuh.p(%[[op]], %[[arg2]], %[[arg1]]) fastmath<contract> : (i32, !fir.vector<8:ui16>, !fir.vector<8:ui16>) -> i32
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[op:.*]] = llvm.mlir.constant(3 : i32) : i32
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vcmpgtuh.p(%[[op]], %[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (i32, vector<8xi16>, vector<8xi16>) -> i32
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtuh.p(i32 3, <8 x i16> %[[arg2]], <8 x i16> %[[arg1]])
-end subroutine vec_any_ge_test_u2
-
-! CHECK-LABEL: vec_any_ge_test_u4
-subroutine vec_any_ge_test_u4(arg1, arg2)
-  vector(unsigned(4)), intent(in) :: arg1, arg2
-  integer(4) :: r
-  r = vec_any_ge(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[op:.*]] = arith.constant 3 : i32
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtuw.p(%[[op]], %[[arg2]], %[[arg1]]) fastmath<contract> : (i32, !fir.vector<4:ui32>, !fir.vector<4:ui32>) -> i32
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[op:.*]] = llvm.mlir.constant(3 : i32) : i32
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vcmpgtuw.p(%[[op]], %[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (i32, vector<4xi32>, vector<4xi32>) -> i32
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtuw.p(i32 3, <4 x i32> %[[arg2]], <4 x i32> %[[arg1]])
-end subroutine vec_any_ge_test_u4
-
-! CHECK-LABEL: vec_any_ge_test_u8
-subroutine vec_any_ge_test_u8(arg1, arg2)
-  vector(unsigned(8)), intent(in) :: arg1, arg2
-  integer(4) :: r
-  r = vec_any_ge(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[op:.*]] = arith.constant 3 : i32
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtud.p(%[[op]], %[[arg2]], %[[arg1]]) fastmath<contract> : (i32, !fir.vector<2:ui64>, !fir.vector<2:ui64>) -> i32
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[op:.*]] = llvm.mlir.constant(3 : i32) : i32
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.altivec.vcmpgtud.p(%[[op]], %[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (i32, vector<2xi64>, vector<2xi64>) -> i32
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call i32 @llvm.ppc.altivec.vcmpgtud.p(i32 3, <2 x i64> %[[arg2]], <2 x i64> %[[arg1]])
-end subroutine vec_any_ge_test_u8
-
-! CHECK-LABEL: vec_any_ge_test_r4
-subroutine vec_any_ge_test_r4(arg1, arg2)
-  vector(real(4)), intent(in) :: arg1, arg2
-  integer(4) :: r
-  r = vec_any_ge(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[op:.*]] = arith.constant 1 : i32
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.vsx.xvcmpgesp.p(%[[op]], %[[arg1]], %[[arg2]]) fastmath<contract> : (i32, !fir.vector<4:f32>, !fir.vector<4:f32>) -> i32
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[op:.*]] = llvm.mlir.constant(1 : i32) : i32
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.vsx.xvcmpgesp.p(%[[op]], %[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (i32, vector<4xf32>, vector<4xf32>) -> i32
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call i32 @llvm.ppc.vsx.xvcmpgesp.p(i32 1, <4 x float> %[[arg1]], <4 x float> %[[arg2]])
-end subroutine vec_any_ge_test_r4
-
-! CHECK-LABEL: vec_any_ge_test_r8
-subroutine vec_any_ge_test_r8(arg1, arg2)
-  vector(real(8)), intent(in) :: arg1, arg2
-  integer(4) :: r
-  r = vec_any_ge(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[op:.*]] = arith.constant 1 : i32
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.vsx.xvcmpgedp.p(%[[op]], %[[arg1]], %[[arg2]]) fastmath<contract> : (i32, !fir.vector<2:f64>, !fir.vector<2:f64>) -> i32
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[op:.*]] = llvm.mlir.constant(1 : i32) : i32
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.call @llvm.ppc.vsx.xvcmpgedp.p(%[[op]], %[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (i32, vector<2xf64>, vector<2xf64>) -> i32
-
-! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call i32 @llvm.ppc.vsx.xvcmpgedp.p(i32 1, <2 x double> %[[arg1]], <2 x double> %[[arg2]])
-end subroutine vec_any_ge_test_r8
-

diff  --git a/flang/test/Lower/PowerPC/ppc-vec_cmp.f90 b/flang/test/Lower/PowerPC/ppc-vec_cmp.f90
deleted file mode 100644
index bf6acbd557e8e1b..000000000000000
--- a/flang/test/Lower/PowerPC/ppc-vec_cmp.f90
+++ /dev/null
@@ -1,827 +0,0 @@
-! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s
-! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
-! REQUIRES: target=powerpc{{.*}}
-
-!----------------------
-! vec_cmpge
-!----------------------
-
-! CHECK-LABEL: vec_cmpge_test_i8
-subroutine vec_cmpge_test_i8(arg1, arg2)
-  vector(integer(8)) :: arg1, arg2
-  vector(unsigned(8)) :: r
-  r = vec_cmpge(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i64
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i64 to vector<2xi64>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtsd(%[[arg2]], %[[arg1]]) fastmath<contract> : (!fir.vector<2:i64>, !fir.vector<2:i64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<2xi64>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i64) : i64
-! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi64>) : vector<2xi64>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtsd(%[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xi64>, vector<2xi64>) -> vector<2xi64>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]]  : vector<2xi64>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[res:.*]] = call <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64> %[[arg2]], <2 x i64> %[[arg1]])
-! CHECK: %{{[0-9]+}} = xor <2 x i64> %[[res]], <i64 -1, i64 -1>
-end subroutine vec_cmpge_test_i8
-
-! CHECK-LABEL: vec_cmpge_test_i4
-subroutine vec_cmpge_test_i4(arg1, arg2)
-  vector(integer(4)) :: arg1, arg2
-  vector(unsigned(4)) :: r
-  r = vec_cmpge(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i32
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i32 to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtsw(%[[arg2]], %[[arg1]]) fastmath<contract> : (!fir.vector<4:i32>, !fir.vector<4:i32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i32) : i32
-! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<4xi32>) : vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtsw(%[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]]  : vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vcmpgtsw(<4 x i32> %[[arg2]], <4 x i32> %[[arg1]])
-! CHECK: %{{[0-9]+}} = xor <4 x i32> %[[res]], <i32 -1, i32 -1, i32 -1, i32 -1>
-end subroutine vec_cmpge_test_i4
-
-! CHECK-LABEL: vec_cmpge_test_i2
-subroutine vec_cmpge_test_i2(arg1, arg2)
-  vector(integer(2)) :: arg1, arg2
-  vector(unsigned(2)) :: r
-  r = vec_cmpge(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i16
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i16 to vector<8xi16>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtsh(%[[arg2]], %[[arg1]]) fastmath<contract> : (!fir.vector<8:i16>, !fir.vector<8:i16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i16) : i16
-! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<8xi16>) : vector<8xi16>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtsh(%[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<8xi16>, vector<8xi16>) -> vector<8xi16>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]]  : vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[res:.*]] = call <8 x i16> @llvm.ppc.altivec.vcmpgtsh(<8 x i16> %[[arg2]], <8 x i16> %[[arg1]])
-! CHECK: %{{[0-9]+}} = xor <8 x i16> %[[res]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
-end subroutine vec_cmpge_test_i2
-
-! CHECK-LABEL: vec_cmpge_test_i1
-subroutine vec_cmpge_test_i1(arg1, arg2)
-  vector(integer(1)) :: arg1, arg2
-  vector(unsigned(1)) :: r
-  r = vec_cmpge(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtsb(%[[arg2]], %[[arg1]]) fastmath<contract> : (!fir.vector<16:i8>, !fir.vector<16:i8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i8) : i8
-! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtsb(%[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<16xi8>, vector<16xi8>) -> vector<16xi8>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]]  : vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[res:.*]] = call <16 x i8> @llvm.ppc.altivec.vcmpgtsb(<16 x i8> %[[arg2]], <16 x i8> %[[arg1]])
-! CHECK: %{{[0-9]+}} = xor <16 x i8> %[[res]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-end subroutine vec_cmpge_test_i1
-
-! CHECK-LABEL: vec_cmpge_test_u8
-subroutine vec_cmpge_test_u8(arg1, arg2)
-  vector(unsigned(8)) :: arg1, arg2
-  vector(unsigned(8)) :: r
-  r = vec_cmpge(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i64
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i64 to vector<2xi64>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtud(%[[arg2]], %[[arg1]]) fastmath<contract> : (!fir.vector<2:ui64>, !fir.vector<2:ui64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<2xi64>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i64) : i64
-! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi64>) : vector<2xi64>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtud(%[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xi64>, vector<2xi64>) -> vector<2xi64>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]]  : vector<2xi64>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[res:.*]] = call <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64> %[[arg2]], <2 x i64> %[[arg1]])
-! CHECK: %{{[0-9]+}} = xor <2 x i64> %[[res]], <i64 -1, i64 -1>
-end subroutine vec_cmpge_test_u8
-
-! CHECK-LABEL: vec_cmpge_test_u4
-subroutine vec_cmpge_test_u4(arg1, arg2)
-  vector(unsigned(4)) :: arg1, arg2
-  vector(unsigned(4)) :: r
-  r = vec_cmpge(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i32
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i32 to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtuw(%[[arg2]], %[[arg1]]) fastmath<contract> : (!fir.vector<4:ui32>, !fir.vector<4:ui32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i32) : i32
-! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<4xi32>) : vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtuw(%[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]]  : vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vcmpgtuw(<4 x i32> %[[arg2]], <4 x i32> %[[arg1]])
-! CHECK: %{{[0-9]+}} = xor <4 x i32> %[[res]], <i32 -1, i32 -1, i32 -1, i32 -1>
-end subroutine vec_cmpge_test_u4
-
-! CHECK-LABEL: vec_cmpge_test_u2
-subroutine vec_cmpge_test_u2(arg1, arg2)
-  vector(unsigned(2)) :: arg1, arg2
-  vector(unsigned(2)) :: r
-  r = vec_cmpge(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i16
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i16 to vector<8xi16>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtuh(%[[arg2]], %[[arg1]]) fastmath<contract> : (!fir.vector<8:ui16>, !fir.vector<8:ui16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i16) : i16
-! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<8xi16>) : vector<8xi16>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtuh(%[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<8xi16>, vector<8xi16>) -> vector<8xi16>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]]  : vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[res:.*]] = call <8 x i16> @llvm.ppc.altivec.vcmpgtuh(<8 x i16> %[[arg2]], <8 x i16> %[[arg1]])
-! CHECK: %{{[0-9]+}} = xor <8 x i16> %[[res]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
-end subroutine vec_cmpge_test_u2
-
-! CHECK-LABEL: vec_cmpge_test_u1
-subroutine vec_cmpge_test_u1(arg1, arg2)
-  vector(unsigned(1)) :: arg1, arg2
-  vector(unsigned(1)) :: r
-  r = vec_cmpge(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtub(%[[arg2]], %[[arg1]]) fastmath<contract> : (!fir.vector<16:ui8>, !fir.vector<16:ui8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i8) : i8
-! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtub(%[[arg2]], %[[arg1]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<16xi8>, vector<16xi8>) -> vector<16xi8>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]]  : vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[res:.*]] = call <16 x i8> @llvm.ppc.altivec.vcmpgtub(<16 x i8> %[[arg2]], <16 x i8> %[[arg1]])
-! CHECK: %{{[0-9]+}} = xor <16 x i8> %[[res]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-end subroutine vec_cmpge_test_u1
-
-subroutine vec_cmpge_test_r4(arg1, arg2)
-  vector(real(4)) :: arg1, arg2
-  vector(unsigned(4)) :: r
-  r = vec_cmpge(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.vsx.xvcmpgesp(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<4:f32>, !fir.vector<4:f32>) -> !fir.vector<4:ui32>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %[[arg1]], <4 x float> %[[arg2]])
-end subroutine vec_cmpge_test_r4
-
-subroutine vec_cmpge_test_r8(arg1, arg2)
-  vector(real(8)) :: arg1, arg2
-  vector(unsigned(8)) :: r
-  r = vec_cmpge(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.vsx.xvcmpgedp(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<2:f64>, !fir.vector<2:f64>) -> !fir.vector<2:ui64>
-
-! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %[[arg1]], <2 x double> %[[arg2]])
-end subroutine vec_cmpge_test_r8
-
-!----------------------
-! vec_cmpgt
-!----------------------
-
-! CHECK-LABEL: vec_cmpgt_test_i1
-subroutine vec_cmpgt_test_i1(arg1, arg2)
-  vector(integer(1)) :: arg1, arg2
-  vector(unsigned(1)) :: r
-  r = vec_cmpgt(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsb(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<16:i8>, !fir.vector<16:i8>) -> !fir.vector<16:ui8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <16 x i8> @llvm.ppc.altivec.vcmpgtsb(<16 x i8> %[[arg1]], <16 x i8> %[[arg2]])
-end subroutine vec_cmpgt_test_i1
-
-! CHECK-LABEL: vec_cmpgt_test_i2
-subroutine vec_cmpgt_test_i2(arg1, arg2)
-  vector(integer(2)) :: arg1, arg2
-  vector(unsigned(2)) :: r
-  r = vec_cmpgt(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsh(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<8:i16>, !fir.vector<8:i16>) -> !fir.vector<8:ui16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <8 x i16> @llvm.ppc.altivec.vcmpgtsh(<8 x i16> %[[arg1]], <8 x i16> %[[arg2]])
-end subroutine vec_cmpgt_test_i2
-
-! CHECK-LABEL: vec_cmpgt_test_i4
-subroutine vec_cmpgt_test_i4(arg1, arg2)
-  vector(integer(4)) :: arg1, arg2
-  vector(unsigned(4)) :: r
-  r = vec_cmpgt(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsw(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<4:i32>, !fir.vector<4:i32>) -> !fir.vector<4:ui32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vcmpgtsw(<4 x i32> %[[arg1]], <4 x i32> %[[arg2]])
-end subroutine vec_cmpgt_test_i4
-
-! CHECK-LABEL: vec_cmpgt_test_i8
-subroutine vec_cmpgt_test_i8(arg1, arg2)
-  vector(integer(8)) :: arg1, arg2
-  vector(unsigned(8)) :: r
-  r = vec_cmpgt(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsd(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<2:i64>, !fir.vector<2:i64>) -> !fir.vector<2:ui64>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64> %[[arg1]], <2 x i64> %[[arg2]])
-end subroutine vec_cmpgt_test_i8
-
-! CHECK-LABEL: vec_cmpgt_test_u1
-subroutine vec_cmpgt_test_u1(arg1, arg2)
-  vector(unsigned(1)) :: arg1, arg2
-  vector(unsigned(1)) :: r
-  r = vec_cmpgt(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtub(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<16:ui8>, !fir.vector<16:ui8>) -> !fir.vector<16:ui8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <16 x i8> @llvm.ppc.altivec.vcmpgtub(<16 x i8> %[[arg1]], <16 x i8> %[[arg2]])
-end subroutine vec_cmpgt_test_u1
-
-! CHECK-LABEL: vec_cmpgt_test_u2
-subroutine vec_cmpgt_test_u2(arg1, arg2)
-  vector(unsigned(2)) :: arg1, arg2
-  vector(unsigned(2)) :: r
-  r = vec_cmpgt(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtuh(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<8:ui16>, !fir.vector<8:ui16>) -> !fir.vector<8:ui16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <8 x i16> @llvm.ppc.altivec.vcmpgtuh(<8 x i16> %[[arg1]], <8 x i16> %[[arg2]])
-end subroutine vec_cmpgt_test_u2
-
-! CHECK-LABEL: vec_cmpgt_test_u4
-subroutine vec_cmpgt_test_u4(arg1, arg2)
-  vector(unsigned(4)) :: arg1, arg2
-  vector(unsigned(4)) :: r
-  r = vec_cmpgt(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtuw(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<4:ui32>, !fir.vector<4:ui32>) -> !fir.vector<4:ui32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vcmpgtuw(<4 x i32> %[[arg1]], <4 x i32> %[[arg2]])
-end subroutine vec_cmpgt_test_u4
-
-! CHECK-LABEL: vec_cmpgt_test_u8
-subroutine vec_cmpgt_test_u8(arg1, arg2)
-  vector(unsigned(8)) :: arg1, arg2
-  vector(unsigned(8)) :: r
-  r = vec_cmpgt(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtud(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<2:ui64>, !fir.vector<2:ui64>) -> !fir.vector<2:ui64>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64> %[[arg1]], <2 x i64> %[[arg2]])
-end subroutine vec_cmpgt_test_u8
-
-! CHECK-LABEL: vec_cmpgt_test_r4
-subroutine vec_cmpgt_test_r4(arg1, arg2)
-  vector(real(4)) :: arg1, arg2
-  vector(unsigned(4)) :: r
-  r = vec_cmpgt(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.vsx.xvcmpgtsp(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<4:f32>, !fir.vector<4:f32>) -> !fir.vector<4:ui32>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %[[arg1]], <4 x float> %[[arg2]])
-end subroutine vec_cmpgt_test_r4
-
-! CHECK-LABEL: vec_cmpgt_test_r8
-subroutine vec_cmpgt_test_r8(arg1, arg2)
-  vector(real(8)) :: arg1, arg2
-  vector(unsigned(8)) :: r
-  r = vec_cmpgt(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.vsx.xvcmpgtdp(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<2:f64>, !fir.vector<2:f64>) -> !fir.vector<2:ui64>
-
-! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %[[arg1]], <2 x double> %[[arg2]])
-end subroutine vec_cmpgt_test_r8
-
-!----------------------
-! vec_cmple
-!----------------------
-
-! CHECK-LABEL: vec_cmple_test_i8
-subroutine vec_cmple_test_i8(arg1, arg2)
-  vector(integer(8)) :: arg1, arg2
-  vector(unsigned(8)) :: r
-  r = vec_cmple(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i64
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i64 to vector<2xi64>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtsd(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<2:i64>, !fir.vector<2:i64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<2xi64>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i64) : i64
-! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi64>) : vector<2xi64>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtsd(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xi64>, vector<2xi64>) -> vector<2xi64>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]]  : vector<2xi64>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[res:.*]] = call <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64> %[[arg1]], <2 x i64> %[[arg2]])
-! CHECK: %{{[0-9]+}} = xor <2 x i64> %[[res]], <i64 -1, i64 -1>
-end subroutine vec_cmple_test_i8
-
-! CHECK-LABEL: vec_cmple_test_i4
-subroutine vec_cmple_test_i4(arg1, arg2)
-  vector(integer(4)) :: arg1, arg2
-  vector(unsigned(4)) :: r
-  r = vec_cmple(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i32
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i32 to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtsw(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<4:i32>, !fir.vector<4:i32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i32) : i32
-! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<4xi32>) : vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtsw(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]]  : vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vcmpgtsw(<4 x i32> %[[arg1]], <4 x i32> %[[arg2]])
-! CHECK: %{{[0-9]+}} = xor <4 x i32> %[[res]], <i32 -1, i32 -1, i32 -1, i32 -1>
-end subroutine vec_cmple_test_i4
-
-! CHECK-LABEL: vec_cmple_test_i2
-subroutine vec_cmple_test_i2(arg1, arg2)
-  vector(integer(2)) :: arg1, arg2
-  vector(unsigned(2)) :: r
-  r = vec_cmple(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i16
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i16 to vector<8xi16>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtsh(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<8:i16>, !fir.vector<8:i16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i16) : i16
-! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<8xi16>) : vector<8xi16>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtsh(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<8xi16>, vector<8xi16>) -> vector<8xi16>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]]  : vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[res:.*]] = call <8 x i16> @llvm.ppc.altivec.vcmpgtsh(<8 x i16> %[[arg1]], <8 x i16> %[[arg2]])
-! CHECK: %{{[0-9]+}} = xor <8 x i16> %[[res]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
-end subroutine vec_cmple_test_i2
-
-! CHECK-LABEL: vec_cmple_test_i1
-subroutine vec_cmple_test_i1(arg1, arg2)
-  vector(integer(1)) :: arg1, arg2
-  vector(unsigned(1)) :: r
-  r = vec_cmple(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtsb(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<16:i8>, !fir.vector<16:i8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i8) : i8
-! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtsb(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<16xi8>, vector<16xi8>) -> vector<16xi8>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]]  : vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[res:.*]] = call <16 x i8> @llvm.ppc.altivec.vcmpgtsb(<16 x i8> %[[arg1]], <16 x i8> %[[arg2]])
-! CHECK: %{{[0-9]+}} = xor <16 x i8> %[[res]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-end subroutine vec_cmple_test_i1
-
-! CHECK-LABEL: vec_cmple_test_u8
-subroutine vec_cmple_test_u8(arg1, arg2)
-  vector(unsigned(8)) :: arg1, arg2
-  vector(unsigned(8)) :: r
-  r = vec_cmple(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i64
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i64 to vector<2xi64>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtud(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<2:ui64>, !fir.vector<2:ui64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<2xi64>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i64) : i64
-! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi64>) : vector<2xi64>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtud(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xi64>, vector<2xi64>) -> vector<2xi64>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]]  : vector<2xi64>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[res:.*]] = call <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64> %[[arg1]], <2 x i64> %[[arg2]])
-! CHECK: %{{[0-9]+}} = xor <2 x i64> %[[res]], <i64 -1, i64 -1>
-end subroutine vec_cmple_test_u8
-
-! CHECK-LABEL: vec_cmple_test_u4
-subroutine vec_cmple_test_u4(arg1, arg2)
-  vector(unsigned(4)) :: arg1, arg2
-  vector(unsigned(4)) :: r
-  r = vec_cmple(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i32
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i32 to vector<4xi32>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtuw(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<4:ui32>, !fir.vector<4:ui32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i32) : i32
-! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<4xi32>) : vector<4xi32>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtuw(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]]  : vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[res:.*]] = call <4 x i32> @llvm.ppc.altivec.vcmpgtuw(<4 x i32> %[[arg1]], <4 x i32> %[[arg2]])
-! CHECK: %{{[0-9]+}} = xor <4 x i32> %[[res]], <i32 -1, i32 -1, i32 -1, i32 -1>
-end subroutine vec_cmple_test_u4
-
-! CHECK-LABEL: vec_cmple_test_u2
-subroutine vec_cmple_test_u2(arg1, arg2)
-  vector(unsigned(2)) :: arg1, arg2
-  vector(unsigned(2)) :: r
-  r = vec_cmple(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i16
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i16 to vector<8xi16>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtuh(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<8:ui16>, !fir.vector<8:ui16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i16) : i16
-! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<8xi16>) : vector<8xi16>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtuh(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<8xi16>, vector<8xi16>) -> vector<8xi16>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]]  : vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[res:.*]] = call <8 x i16> @llvm.ppc.altivec.vcmpgtuh(<8 x i16> %[[arg1]], <8 x i16> %[[arg2]])
-! CHECK: %{{[0-9]+}} = xor <8 x i16> %[[res]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
-end subroutine vec_cmple_test_u2
-
-! CHECK-LABEL: vec_cmple_test_u1
-subroutine vec_cmple_test_u1(arg1, arg2)
-  vector(unsigned(1)) :: arg1, arg2
-  vector(unsigned(1)) :: r
-  r = vec_cmple(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[res:.*]] = fir.call @llvm.ppc.altivec.vcmpgtub(%[[arg1]], %[[arg2]]) fastmath<contract> : (!fir.vector<16:ui8>, !fir.vector<16:ui8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: %[[vres:.*]] = fir.convert %[[res]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[xorres:.*]] = arith.xori %[[vres]], %[[vc]] : vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[xorres]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(-1 : i8) : i8
-! CHECK-LLVMIR: %[[vc:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[res:.*]] = llvm.call @llvm.ppc.altivec.vcmpgtub(%[[arg1]], %[[arg2]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<16xi8>, vector<16xi8>) -> vector<16xi8>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.xor %[[res]], %[[vc]]  : vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[res:.*]] = call <16 x i8> @llvm.ppc.altivec.vcmpgtub(<16 x i8> %[[arg1]], <16 x i8> %[[arg2]])
-! CHECK: %{{[0-9]+}} = xor <16 x i8> %[[res]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-end subroutine vec_cmple_test_u1
-
-! CHECK-LABEL: vec_cmple_test_r4
-subroutine vec_cmple_test_r4(arg1, arg2)
-  vector(real(4)) :: arg1, arg2
-  vector(unsigned(4)) :: r
-  r = vec_cmple(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.vsx.xvcmpgesp(%[[arg2]], %[[arg1]]) fastmath<contract> : (!fir.vector<4:f32>, !fir.vector<4:f32>) -> !fir.vector<4:ui32>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %[[arg2]], <4 x float> %[[arg1]])
-end subroutine vec_cmple_test_r4
-
-! CHECK-LABEL: vec_cmple_test_r8
-subroutine vec_cmple_test_r8(arg1, arg2)
-  vector(real(8)) :: arg1, arg2
-  vector(unsigned(8)) :: r
-  r = vec_cmple(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.vsx.xvcmpgedp(%[[arg2]], %[[arg1]]) fastmath<contract> : (!fir.vector<2:f64>, !fir.vector<2:f64>) -> !fir.vector<2:ui64>
-
-! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %[[arg2]], <2 x double> %[[arg1]])
-end subroutine vec_cmple_test_r8
-
-!----------------------
-! vec_cmplt
-!----------------------
-
-! CHECK-LABEL: vec_cmplt_test_i1
-subroutine vec_cmplt_test_i1(arg1, arg2)
-  vector(integer(1)) :: arg1, arg2
-  vector(unsigned(1)) :: r
-  r = vec_cmplt(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsb(%[[arg2]], %[[arg1]]) fastmath<contract> : (!fir.vector<16:i8>, !fir.vector<16:i8>) -> !fir.vector<16:ui8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <16 x i8> @llvm.ppc.altivec.vcmpgtsb(<16 x i8> %[[arg2]], <16 x i8> %[[arg1]])
-end subroutine vec_cmplt_test_i1
-
-! CHECK-LABEL: vec_cmplt_test_i2
-subroutine vec_cmplt_test_i2(arg1, arg2)
-  vector(integer(2)) :: arg1, arg2
-  vector(unsigned(2)) :: r
-  r = vec_cmplt(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsh(%[[arg2]], %[[arg1]]) fastmath<contract> : (!fir.vector<8:i16>, !fir.vector<8:i16>) -> !fir.vector<8:ui16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <8 x i16> @llvm.ppc.altivec.vcmpgtsh(<8 x i16> %[[arg2]], <8 x i16> %[[arg1]])
-end subroutine vec_cmplt_test_i2
-
-! CHECK-LABEL: vec_cmplt_test_i4
-subroutine vec_cmplt_test_i4(arg1, arg2)
-  vector(integer(4)) :: arg1, arg2
-  vector(unsigned(4)) :: r
-  r = vec_cmplt(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsw(%[[arg2]], %[[arg1]]) fastmath<contract> : (!fir.vector<4:i32>, !fir.vector<4:i32>) -> !fir.vector<4:ui32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vcmpgtsw(<4 x i32> %[[arg2]], <4 x i32> %[[arg1]])
-end subroutine vec_cmplt_test_i4
-
-! CHECK-LABEL: vec_cmplt_test_i8
-subroutine vec_cmplt_test_i8(arg1, arg2)
-  vector(integer(8)) :: arg1, arg2
-  vector(unsigned(8)) :: r
-  r = vec_cmplt(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtsd(%[[arg2]], %[[arg1]]) fastmath<contract> : (!fir.vector<2:i64>, !fir.vector<2:i64>) -> !fir.vector<2:ui64>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64> %[[arg2]], <2 x i64> %[[arg1]])
-end subroutine vec_cmplt_test_i8
-
-! CHECK-LABEL: vec_cmplt_test_u1
-subroutine vec_cmplt_test_u1(arg1, arg2)
-  vector(unsigned(1)) :: arg1, arg2
-  vector(unsigned(1)) :: r
-  r = vec_cmplt(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtub(%[[arg2]], %[[arg1]]) fastmath<contract> : (!fir.vector<16:ui8>, !fir.vector<16:ui8>) -> !fir.vector<16:ui8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <16 x i8> @llvm.ppc.altivec.vcmpgtub(<16 x i8> %[[arg2]], <16 x i8> %[[arg1]])
-end subroutine vec_cmplt_test_u1
-
-! CHECK-LABEL: vec_cmplt_test_u2
-subroutine vec_cmplt_test_u2(arg1, arg2)
-  vector(unsigned(2)) :: arg1, arg2
-  vector(unsigned(2)) :: r
-  r = vec_cmplt(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtuh(%[[arg2]], %[[arg1]]) fastmath<contract> : (!fir.vector<8:ui16>, !fir.vector<8:ui16>) -> !fir.vector<8:ui16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <8 x i16> @llvm.ppc.altivec.vcmpgtuh(<8 x i16> %[[arg2]], <8 x i16> %[[arg1]])
-end subroutine vec_cmplt_test_u2
-
-! CHECK-LABEL: vec_cmplt_test_u4
-subroutine vec_cmplt_test_u4(arg1, arg2)
-  vector(unsigned(4)) :: arg1, arg2
-  vector(unsigned(4)) :: r
-  r = vec_cmplt(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtuw(%[[arg2]], %[[arg1]]) fastmath<contract> : (!fir.vector<4:ui32>, !fir.vector<4:ui32>) -> !fir.vector<4:ui32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.altivec.vcmpgtuw(<4 x i32> %[[arg2]], <4 x i32> %[[arg1]])
-end subroutine vec_cmplt_test_u4
-
-! CHECK-LABEL: vec_cmplt_test_u8
-subroutine vec_cmplt_test_u8(arg1, arg2)
-  vector(unsigned(8)) :: arg1, arg2
-  vector(unsigned(8)) :: r
-  r = vec_cmplt(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.altivec.vcmpgtud(%[[arg2]], %[[arg1]]) fastmath<contract> : (!fir.vector<2:ui64>, !fir.vector<2:ui64>) -> !fir.vector<2:ui64>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64> %[[arg2]], <2 x i64> %[[arg1]])
-end subroutine vec_cmplt_test_u8
-
-! CHECK-LABEL: vec_cmplt_test_r4
-subroutine vec_cmplt_test_r4(arg1, arg2)
-  vector(real(4)) :: arg1, arg2
-  vector(unsigned(4)) :: r
-  r = vec_cmplt(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.vsx.xvcmpgtsp(%[[arg2]], %[[arg1]]) fastmath<contract> : (!fir.vector<4:f32>, !fir.vector<4:f32>) -> !fir.vector<4:ui32>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %[[arg2]], <4 x float> %[[arg1]])
-end subroutine vec_cmplt_test_r4
-
-! CHECK-LABEL: vec_cmplt_test_r8
-subroutine vec_cmplt_test_r8(arg1, arg2)
-  vector(real(8)) :: arg1, arg2
-  vector(unsigned(8)) :: r
-  r = vec_cmplt(arg1, arg2)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %{{[0-9]+}} = fir.call @llvm.ppc.vsx.xvcmpgtdp(%[[arg2]], %[[arg1]]) fastmath<contract> : (!fir.vector<2:f64>, !fir.vector<2:f64>) -> !fir.vector<2:ui64>
-
-! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %{{[0-9]+}} = call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %[[arg2]], <2 x double> %[[arg1]])
-end subroutine vec_cmplt_test_r8
-

diff  --git a/flang/test/Lower/PowerPC/ppc-vec_cvf-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec_cvf-elem-order.f90
deleted file mode 100644
index 232578437641f6e..000000000000000
--- a/flang/test/Lower/PowerPC/ppc-vec_cvf-elem-order.f90
+++ /dev/null
@@ -1,37 +0,0 @@
-! RUN: %flang_fc1 -emit-fir %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="FIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -fno-ppc-native-vector-element-order -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="LLVMIR" %s
-! REQUIRES: target=powerpc{{.*}}
-
-! CHECK-LABEL: vec_cvf_test_r4r8
-subroutine vec_cvf_test_r4r8(arg1)
-  vector(real(8)), intent(in) :: arg1
-  vector(real(4)) :: r
-  r = vec_cvf(arg1)
-
-! FIR: %[[arg:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! FIR: %[[carg:.*]] = fir.convert %[[arg]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.xvcvdpsp(%[[carg]]) fastmath<contract> : (vector<2xf64>) -> !fir.vector<4:f32>
-! FIR: %[[ccall:.*]] = fir.convert %[[call]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! FIR: %[[r:.*]] = fir.convert %[[ccall]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-
-! LLVMIR: %[[arg:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! LLVMIR: %[[call:.*]] = call contract <4 x float> @llvm.ppc.vsx.xvcvdpsp(<2 x double> %[[arg]])
-! LLVMIR: store <4 x float> %[[call]], ptr %{{.*}}, align 16
-end subroutine vec_cvf_test_r4r8
-
-! CHECK-LABEL: vec_cvf_test_r8r4
-subroutine vec_cvf_test_r8r4(arg1)
-  vector(real(4)), intent(in) :: arg1
-  vector(real(8)) :: r
-  r = vec_cvf(arg1)
-
-! FIR: %[[arg:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! FIR: %[[carg:.*]] = fir.convert %[[arg]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.xvcvspdp(%[[carg]]) fastmath<contract> : (vector<4xf32>) -> !fir.vector<2:f64>
-! FIR: fir.store %[[call]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-
-! LLVMIR: %[[arg:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! LLVMIR: %[[r:.*]] = call contract <2 x double> @llvm.ppc.vsx.xvcvspdp(<4 x float> %[[arg]])
-! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
-end subroutine vec_cvf_test_r8r4

diff  --git a/flang/test/Lower/PowerPC/ppc-vec_max-min-madd-nmsub.f90 b/flang/test/Lower/PowerPC/ppc-vec_max-min-madd-nmsub.f90
deleted file mode 100644
index 1f95223daf26c51..000000000000000
--- a/flang/test/Lower/PowerPC/ppc-vec_max-min-madd-nmsub.f90
+++ /dev/null
@@ -1,628 +0,0 @@
-! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s
-! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
-! REQUIRES: target=powerpc{{.*}}
-
-! vec_max
-
-! CHECK-LABEL: vec_max_testf32
-subroutine vec_max_testf32(x, y)
-  vector(real(4)) :: vmax, x, y
-  vmax = vec_max(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[vmax:.*]] = fir.call @llvm.ppc.vsx.xvmaxsp(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<4:f32>, !fir.vector<4:f32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[vmax]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[vmax:.*]] = llvm.call @llvm.ppc.vsx.xvmaxsp(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xf32>, vector<4xf32>) -> vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[vmax]], %{{[0-9]}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmax:.*]] = call contract <4 x float> @llvm.ppc.vsx.xvmaxsp(<4 x float> %[[x]], <4 x float> %[[y]])
-! CHECK: store <4 x float> %[[vmax]], ptr %{{[0-9]}}, align 16
-end subroutine vec_max_testf32
-
-! CHECK-LABEL: vec_max_testf64
-subroutine vec_max_testf64(x, y)
-  vector(real(8)) :: vmax, x, y
-  vmax = vec_max(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[vmax:.*]] = fir.call @llvm.ppc.vsx.xvmaxdp(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<2:f64>, !fir.vector<2:f64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[vmax]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[vmax:.*]] = llvm.call @llvm.ppc.vsx.xvmaxdp(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xf64>, vector<2xf64>) -> vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[vmax]], %{{[0-9]}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmax:.*]] = call contract <2 x double> @llvm.ppc.vsx.xvmaxdp(<2 x double> %[[x]], <2 x double> %[[y]])
-! CHECK: store <2 x double> %[[vmax]], ptr %{{[0-9]}}, align 16
-end subroutine vec_max_testf64
-
-! CHECK-LABEL: vec_max_testi8
-subroutine vec_max_testi8(x, y)
-  vector(integer(1)) :: vmax, x, y
-  vmax = vec_max(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[vmax:.*]] = fir.call @llvm.ppc.altivec.vmaxsb(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<16:i8>, !fir.vector<16:i8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[vmax]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[vmax:.*]] = llvm.call @llvm.ppc.altivec.vmaxsb(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<16xi8>, vector<16xi8>) -> vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[vmax]], %{{[0-9]}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmax:.*]] = call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %[[x]], <16 x i8> %[[y]])
-! CHECK: store <16 x i8> %[[vmax]], ptr %{{[0-9]}}, align 16
-end subroutine vec_max_testi8
-
-! CHECK-LABEL: vec_max_testi16
-subroutine vec_max_testi16(x, y)
-  vector(integer(2)) :: vmax, x, y
-  vmax = vec_max(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[vmax:.*]] = fir.call @llvm.ppc.altivec.vmaxsh(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<8:i16>, !fir.vector<8:i16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[vmax]] to %{{[0-9]}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[vmax:.*]] = llvm.call @llvm.ppc.altivec.vmaxsh(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<8xi16>, vector<8xi16>) -> vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[vmax]], %{{[0-9]}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmax:.*]] = call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %[[x]], <8 x i16> %[[y]])
-! CHECK: store <8 x i16> %[[vmax]], ptr %{{[0-9]}}, align 16
-end subroutine vec_max_testi16
-
-! CHECK-LABEL: vec_max_testi32
-subroutine vec_max_testi32(x, y)
-  vector(integer(4)) :: vmax, x, y
-  vmax = vec_max(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[vmax:.*]] = fir.call @llvm.ppc.altivec.vmaxsw(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<4:i32>, !fir.vector<4:i32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[vmax]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[vmax:.*]] = llvm.call @llvm.ppc.altivec.vmaxsw(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[vmax]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmax:.*]] = call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %[[x]], <4 x i32> %[[y]])
-! CHECK: store <4 x i32> %[[vmax]], ptr %{{[0-9]}}, align 16
-end subroutine vec_max_testi32
-
-! CHECK-LABEL: vec_max_testi64
-subroutine vec_max_testi64(x, y)
-  vector(integer(8)) :: vmax, x, y
-  vmax = vec_max(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[vmax:.*]] = fir.call @llvm.ppc.altivec.vmaxsd(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<2:i64>, !fir.vector<2:i64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[vmax]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[vmax:.*]] = llvm.call @llvm.ppc.altivec.vmaxsd(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xi64>, vector<2xi64>) -> vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[vmax]], %{{[0-9]}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmax:.*]] = call <2 x i64> @llvm.ppc.altivec.vmaxsd(<2 x i64> %[[x]], <2 x i64> %[[y]])
-! CHECK: store <2 x i64> %[[vmax]], ptr %{{[0-9]}}, align 16
-end subroutine vec_max_testi64
-
-! CHECK-LABEL: vec_max_testui8
-subroutine vec_max_testui8(x, y)
-  vector(unsigned(1)) :: vmax, x, y
-  vmax = vec_max(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[vmax:.*]] = fir.call @llvm.ppc.altivec.vmaxub(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<16:ui8>, !fir.vector<16:ui8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[vmax]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[vmax:.*]] = llvm.call @llvm.ppc.altivec.vmaxub(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<16xi8>, vector<16xi8>) -> vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[vmax]], %{{[0-9]}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmax:.*]] = call <16 x i8> @llvm.ppc.altivec.vmaxub(<16 x i8> %[[x]], <16 x i8> %[[y]])
-! CHECK: store <16 x i8> %[[vmax]], ptr %{{[0-9]}}, align 16
-end subroutine vec_max_testui8
-
-! CHECK-LABEL: vec_max_testui16
-subroutine vec_max_testui16(x, y)
-  vector(unsigned(2)) :: vmax, x, y
-  vmax = vec_max(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[vmax:.*]] = fir.call @llvm.ppc.altivec.vmaxuh(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<8:ui16>, !fir.vector<8:ui16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[vmax]] to %{{[0-9]}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[vmax:.*]] = llvm.call @llvm.ppc.altivec.vmaxuh(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<8xi16>, vector<8xi16>) -> vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[vmax]], %{{[0-9]}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmax:.*]] = call <8 x i16> @llvm.ppc.altivec.vmaxuh(<8 x i16> %[[x]], <8 x i16> %[[y]])
-! CHECK: store <8 x i16> %[[vmax]], ptr %{{[0-9]}}, align 16
-end subroutine vec_max_testui16
-
-! CHECK-LABEL: vec_max_testui32
-subroutine vec_max_testui32(x, y)
-  vector(unsigned(4)) :: vmax, x, y
-  vmax = vec_max(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[vmax:.*]] = fir.call @llvm.ppc.altivec.vmaxuw(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<4:ui32>, !fir.vector<4:ui32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[vmax]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[vmax:.*]] = llvm.call @llvm.ppc.altivec.vmaxuw(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[vmax]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmax:.*]] = call <4 x i32> @llvm.ppc.altivec.vmaxuw(<4 x i32> %[[x]], <4 x i32> %[[y]])
-! CHECK: store <4 x i32> %[[vmax]], ptr %{{[0-9]}}, align 16
-end subroutine vec_max_testui32
-
-! CHECK-LABEL: vec_max_testui64
-subroutine vec_max_testui64(x, y)
-  vector(unsigned(8)) :: vmax, x, y
-  vmax = vec_max(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[vmax:.*]] = fir.call @llvm.ppc.altivec.vmaxud(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<2:ui64>, !fir.vector<2:ui64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[vmax]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[vmax:.*]] = llvm.call @llvm.ppc.altivec.vmaxud(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xi64>, vector<2xi64>) -> vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[vmax]], %{{[0-9]}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmax:.*]] = call <2 x i64> @llvm.ppc.altivec.vmaxud(<2 x i64> %[[x]], <2 x i64> %[[y]])
-! CHECK: store <2 x i64> %[[vmax]], ptr %{{[0-9]}}, align 16
-end subroutine vec_max_testui64
-
-! vec_min
-
-! CHECK-LABEL: vec_min_testf32
-subroutine vec_min_testf32(x, y)
-  vector(real(4)) :: vmin, x, y
-  vmin = vec_min(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[vmin:.*]] = fir.call @llvm.ppc.vsx.xvminsp(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<4:f32>, !fir.vector<4:f32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[vmin]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[vmin:.*]] = llvm.call @llvm.ppc.vsx.xvminsp(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xf32>, vector<4xf32>) -> vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[vmin]], %{{[0-9]}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmin:.*]] = call contract <4 x float> @llvm.ppc.vsx.xvminsp(<4 x float> %[[x]], <4 x float> %[[y]])
-! CHECK: store <4 x float> %[[vmin]], ptr %{{[0-9]}}, align 16
-end subroutine vec_min_testf32
-
-! CHECK-LABEL: vec_min_testf64
-subroutine vec_min_testf64(x, y)
-  vector(real(8)) :: vmin, x, y
-  vmin = vec_min(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[vmin:.*]] = fir.call @llvm.ppc.vsx.xvmindp(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<2:f64>, !fir.vector<2:f64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[vmin]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[vmin:.*]] = llvm.call @llvm.ppc.vsx.xvmindp(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xf64>, vector<2xf64>) -> vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[vmin]], %{{[0-9]}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmin:.*]] = call contract <2 x double> @llvm.ppc.vsx.xvmindp(<2 x double> %[[x]], <2 x double> %[[y]])
-! CHECK: store <2 x double> %[[vmin]], ptr %{{[0-9]}}, align 16
-end subroutine vec_min_testf64
-
-! CHECK-LABEL: vec_min_testi8
-subroutine vec_min_testi8(x, y)
-  vector(integer(1)) :: vmin, x, y
-  vmin = vec_min(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[vmin:.*]] = fir.call @llvm.ppc.altivec.vminsb(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<16:i8>, !fir.vector<16:i8>) -> !fir.vector<16:i8>
-! CHECK-FIR: fir.store %[[vmin]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:i8>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[vmin:.*]] = llvm.call @llvm.ppc.altivec.vminsb(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<16xi8>, vector<16xi8>) -> vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[vmin]], %{{[0-9]}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmin:.*]] = call <16 x i8> @llvm.ppc.altivec.vminsb(<16 x i8> %[[x]], <16 x i8> %[[y]])
-! CHECK: store <16 x i8> %[[vmin]], ptr %{{[0-9]}}, align 16
-end subroutine vec_min_testi8
-
-! CHECK-LABEL: vec_min_testi16
-subroutine vec_min_testi16(x, y)
-  vector(integer(2)) :: vmin, x, y
-  vmin = vec_min(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[vmin:.*]] = fir.call @llvm.ppc.altivec.vminsh(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<8:i16>, !fir.vector<8:i16>) -> !fir.vector<8:i16>
-! CHECK-FIR: fir.store %[[vmin]] to %{{[0-9]}} : !fir.ref<!fir.vector<8:i16>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[vmin:.*]] = llvm.call @llvm.ppc.altivec.vminsh(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<8xi16>, vector<8xi16>) -> vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[vmin]], %{{[0-9]}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmin:.*]] = call <8 x i16> @llvm.ppc.altivec.vminsh(<8 x i16> %[[x]], <8 x i16> %[[y]])
-! CHECK: store <8 x i16> %[[vmin]], ptr %{{[0-9]}}, align 16
-end subroutine vec_min_testi16
-
-! CHECK-LABEL: vec_min_testi32
-subroutine vec_min_testi32(x, y)
-  vector(integer(4)) :: vmin, x, y
-  vmin = vec_min(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[vmin:.*]] = fir.call @llvm.ppc.altivec.vminsw(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<4:i32>, !fir.vector<4:i32>) -> !fir.vector<4:i32>
-! CHECK-FIR: fir.store %[[vmin]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[vmin:.*]] = llvm.call @llvm.ppc.altivec.vminsw(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[vmin]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmin:.*]] = call <4 x i32> @llvm.ppc.altivec.vminsw(<4 x i32> %[[x]], <4 x i32> %[[y]])
-! CHECK: store <4 x i32> %[[vmin]], ptr %{{[0-9]}}, align 16
-end subroutine vec_min_testi32
-
-! CHECK-LABEL: vec_min_testi64
-subroutine vec_min_testi64(x, y)
-  vector(integer(8)) :: vmin, x, y
-  vmin = vec_min(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[vmin:.*]] = fir.call @llvm.ppc.altivec.vminsd(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<2:i64>, !fir.vector<2:i64>) -> !fir.vector<2:i64>
-! CHECK-FIR: fir.store %[[vmin]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[vmin:.*]] = llvm.call @llvm.ppc.altivec.vminsd(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xi64>, vector<2xi64>) -> vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[vmin]], %{{[0-9]}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmin:.*]] = call <2 x i64> @llvm.ppc.altivec.vminsd(<2 x i64> %[[x]], <2 x i64> %[[y]])
-! CHECK: store <2 x i64> %[[vmin]], ptr %{{[0-9]}}, align 16
-end subroutine vec_min_testi64
-
-! CHECK-LABEL: vec_min_testui8
-subroutine vec_min_testui8(x, y)
-  vector(unsigned(1)) :: vmin, x, y
-  vmin = vec_min(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[vmin:.*]] = fir.call @llvm.ppc.altivec.vminub(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<16:ui8>, !fir.vector<16:ui8>) -> !fir.vector<16:ui8>
-! CHECK-FIR: fir.store %[[vmin]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:ui8>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[vmin:.*]] = llvm.call @llvm.ppc.altivec.vminub(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<16xi8>, vector<16xi8>) -> vector<16xi8>
-! CHECK-LLVMIR: llvm.store %[[vmin]], %{{[0-9]}} : !llvm.ptr<vector<16xi8>>
-
-! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmin:.*]] = call <16 x i8> @llvm.ppc.altivec.vminub(<16 x i8> %[[x]], <16 x i8> %[[y]])
-! CHECK: store <16 x i8> %[[vmin]], ptr %{{[0-9]}}, align 16
-end subroutine vec_min_testui8
-
-! CHECK-LABEL: vec_min_testui16
-subroutine vec_min_testui16(x, y)
-  vector(unsigned(2)) :: vmin, x, y
-  vmin = vec_min(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[vmin:.*]] = fir.call @llvm.ppc.altivec.vminuh(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<8:ui16>, !fir.vector<8:ui16>) -> !fir.vector<8:ui16>
-! CHECK-FIR: fir.store %[[vmin]] to %{{[0-9]}} : !fir.ref<!fir.vector<8:ui16>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[vmin:.*]] = llvm.call @llvm.ppc.altivec.vminuh(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<8xi16>, vector<8xi16>) -> vector<8xi16>
-! CHECK-LLVMIR: llvm.store %[[vmin]], %{{[0-9]}} : !llvm.ptr<vector<8xi16>>
-
-! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmin:.*]] = call <8 x i16> @llvm.ppc.altivec.vminuh(<8 x i16> %[[x]], <8 x i16> %[[y]])
-! CHECK: store <8 x i16> %[[vmin]], ptr %{{[0-9]}}, align 16
-end subroutine vec_min_testui16
-
-! CHECK-LABEL: vec_min_testui32
-subroutine vec_min_testui32(x, y)
-  vector(unsigned(4)) :: vmin, x, y
-  vmin = vec_min(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[vmin:.*]] = fir.call @llvm.ppc.altivec.vminuw(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<4:ui32>, !fir.vector<4:ui32>) -> !fir.vector<4:ui32>
-! CHECK-FIR: fir.store %[[vmin]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:ui32>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[vmin:.*]] = llvm.call @llvm.ppc.altivec.vminuw(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
-! CHECK-LLVMIR: llvm.store %[[vmin]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
-
-! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmin:.*]] = call <4 x i32> @llvm.ppc.altivec.vminuw(<4 x i32> %[[x]], <4 x i32> %[[y]])
-! CHECK: store <4 x i32> %[[vmin]], ptr %{{[0-9]}}, align 16
-end subroutine vec_min_testui32
-
-! CHECK-LABEL: vec_min_testui64
-subroutine vec_min_testui64(x, y)
-  vector(unsigned(8)) :: vmin, x, y
-  vmin = vec_min(x, y)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[vmin:.*]] = fir.call @llvm.ppc.altivec.vminud(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<2:ui64>, !fir.vector<2:ui64>) -> !fir.vector<2:ui64>
-! CHECK-FIR: fir.store %[[vmin]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:ui64>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[vmin:.*]] = llvm.call @llvm.ppc.altivec.vminud(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xi64>, vector<2xi64>) -> vector<2xi64>
-! CHECK-LLVMIR: llvm.store %[[vmin]], %{{[0-9]}} : !llvm.ptr<vector<2xi64>>
-
-! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmin:.*]] = call <2 x i64> @llvm.ppc.altivec.vminud(<2 x i64> %[[x]], <2 x i64> %[[y]])
-! CHECK: store <2 x i64> %[[vmin]], ptr %{{[0-9]}}, align 16
-end subroutine vec_min_testui64
-
-! vec_madd
-
-! CHECK-LABEL: vec_madd_testf32
-subroutine vec_madd_testf32(x, y, z)
-  vector(real(4)) :: vmsum, x, y, z
-  vmsum = vec_madd(x, y, z)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[z:.*]] = fir.load %arg2 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[vmsum:.*]] = fir.call @llvm.fma.v4f32(%[[x]], %[[y]], %[[z]]) fastmath<contract> : (!fir.vector<4:f32>, !fir.vector<4:f32>, !fir.vector<4:f32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[vmsum]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[z:.*]] = llvm.load %arg2 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[vmsum:.*]] = llvm.call @llvm.fma.v4f32(%[[x]], %[[y]], %[[z]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[vmsum]], %{{[0-9]}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[z:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmsum:.*]] = call contract <4 x float> @llvm.fma.v4f32(<4 x float> %[[x]], <4 x float> %[[y]], <4 x float> %[[z]])
-! CHECK: store <4 x float> %[[vmsum]], ptr %{{[0-9]}}, align 16
-end subroutine vec_madd_testf32
-
-! CHECK-LABEL: vec_madd_testf64
-subroutine vec_madd_testf64(x, y, z)
-  vector(real(8)) :: vmsum, x, y, z
-  vmsum = vec_madd(x, y, z)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[z:.*]] = fir.load %arg2 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[vmsum:.*]] = fir.call @llvm.fma.v2f64(%[[x]], %[[y]], %[[z]]) fastmath<contract> : (!fir.vector<2:f64>, !fir.vector<2:f64>, !fir.vector<2:f64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[vmsum]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[z:.*]] = llvm.load %arg2 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[vmsum:.*]] = llvm.call @llvm.fma.v2f64(%[[x]], %[[y]], %[[z]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xf64>, vector<2xf64>, vector<2xf64>) -> vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[vmsum]], %{{[0-9]}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[z:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vmsum:.*]] = call contract <2 x double> @llvm.fma.v2f64(<2 x double> %[[x]], <2 x double> %[[y]], <2 x double> %[[z]])
-! CHECK: store <2 x double> %[[vmsum]], ptr %{{[0-9]}}, align 16
-end subroutine vec_madd_testf64
-
-! vec_nmsub
-
-! CHECK-LABEL: vec_nmsub_testf32
-subroutine vec_nmsub_testf32(x, y, z)
-  vector(real(4)) :: vnmsub, x, y, z
-  vnmsub = vec_nmsub(x, y, z)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[z:.*]] = fir.load %arg2 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[nmsub:.*]] = fir.call @llvm.ppc.fnmsub.v4f32(%[[x]], %[[y]], %[[z]]) fastmath<contract> : (!fir.vector<4:f32>, !fir.vector<4:f32>, !fir.vector<4:f32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[nmsub]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[z:.*]] = llvm.load %arg2 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[vnmsub:.*]] = llvm.call @llvm.ppc.fnmsub.v4f32(%[[x]], %[[y]], %[[z]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[vnmsub]], %{{[0-9]}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[z:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vnmsub:.*]] = call contract <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float> %[[x]], <4 x float> %[[y]], <4 x float> %[[z]])
-! CHECK: store <4 x float> %[[vnmsub]], ptr %{{[0-9]}}, align 16
-end subroutine vec_nmsub_testf32
-
-! CHECK-LABEL: vec_nmsub_testf64
-subroutine vec_nmsub_testf64(x, y, z)
-  vector(real(8)) :: vnmsub, x, y, z
-  vnmsub = vec_nmsub(x, y, z)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[z:.*]] = fir.load %arg2 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[nmsub:.*]] = fir.call @llvm.ppc.fnmsub.v2f64(%[[x]], %[[y]], %[[z]]) fastmath<contract> : (!fir.vector<2:f64>, !fir.vector<2:f64>, !fir.vector<2:f64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[nmsub]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[z:.*]] = llvm.load %arg2 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[vnmsub:.*]] = llvm.call @llvm.ppc.fnmsub.v2f64(%[[x]], %[[y]], %[[z]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xf64>, vector<2xf64>, vector<2xf64>) -> vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[vnmsub]], %{{[0-9]}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[z:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[vnmsub:.*]] = call contract <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double> %[[x]], <2 x double> %[[y]], <2 x double> %[[z]])
-! CHECK: store <2 x double> %[[vnmsub]], ptr %{{[0-9]}}, align 16
-end subroutine vec_nmsub_testf64
-
-! vec_msub
-
-! CHECK-LABEL: vec_msub_testf32
-subroutine vec_msub_testf32(x, y, z)
-  vector(real(4)) :: vmsub, x, y, z
-  vmsub = vec_msub(x, y, z)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[z:.*]] = fir.load %arg2 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[vz:.*]] = fir.convert %[[z]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[nz:.*]] = arith.negf %[[vz]] fastmath<contract> : vector<4xf32>
-! CHECK-FIR: %[[vmsub:.*]] = fir.call @llvm.fma.v4f32(%[[vx]], %[[vy]], %[[nz]]) fastmath<contract> : (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[vmsub]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[z:.*]] = llvm.load %arg2 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[nz:.*]] = llvm.fneg %[[z]]  {fastmathFlags = #llvm.fastmath<contract>} : vector<4xf32>
-! CHECK-LLVMIR: %[[vmsub:.*]] = llvm.call @llvm.fma.v4f32(%[[x]], %[[y]], %[[nz]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[vmsub]], %{{[0-9]}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[z:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[nz:.*]] = fneg contract <4 x float> %[[z]]
-! CHECK: %[[vmsub:.*]] = call contract <4 x float> @llvm.fma.v4f32(<4 x float> %[[x]], <4 x float> %[[y]], <4 x float> %[[nz]])
-! CHECK: store <4 x float> %[[vmsub]], ptr %{{[0-9]}}, align 16
-end subroutine vec_msub_testf32
-
-! CHECK-LABEL: vec_msub_testf64
-subroutine vec_msub_testf64(x, y, z)
-  vector(real(8)) :: vmsub, x, y, z
-  vmsub = vec_msub(x, y, z)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[z:.*]] = fir.load %arg2 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[vz:.*]] = fir.convert %[[z]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[nz:.*]] = arith.negf %[[vz]] fastmath<contract> : vector<2xf64>
-! CHECK-FIR: %[[vmsub:.*]] = fir.call @llvm.fma.v2f64(%[[vx]], %[[vy]], %[[nz]]) fastmath<contract> : (vector<2xf64>, vector<2xf64>, vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[vmsub]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[z:.*]] = llvm.load %arg2 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[nz:.*]] = llvm.fneg %[[z]]  {fastmathFlags = #llvm.fastmath<contract>} : vector<2xf64>
-! CHECK-LLVMIR: %[[vmsub:.*]] = llvm.call @llvm.fma.v2f64(%[[x]], %[[y]], %[[nz]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xf64>, vector<2xf64>, vector<2xf64>) -> vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[vmsub]], %{{[0-9]}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[z:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[nz:.*]] = fneg contract <2 x double> %[[z]]
-! CHECK: %[[vmsub:.*]] = call contract <2 x double> @llvm.fma.v2f64(<2 x double> %[[x]], <2 x double> %[[y]], <2 x double> %[[nz]])
-! CHECK: store <2 x double> %[[vmsub]], ptr %{{[0-9]}}, align 16
-end subroutine vec_msub_testf64
-
-! vec_nmadd
-
-! CHECK-LABEL: vec_nmadd_testf32
-subroutine vec_nmadd_testf32(x, y, z)
-  vector(real(4)) :: vnmsum, x, y, z
-  vnmsum = vec_nmadd(x, y, z)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[z:.*]] = fir.load %arg2 : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[vz:.*]] = fir.convert %[[z]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[msum:.*]] = fir.call @llvm.fma.v4f32(%[[vx]], %[[vy]], %[[vz]]) fastmath<contract> : (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: %[[vmsum:.*]] = fir.convert %[[msum]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[nmsum:.*]] = arith.negf %[[vmsum]] fastmath<contract> : vector<4xf32>
-! CHECK-FIR: %[[vnmsum:.*]] = fir.convert %[[nmsum]] : (vector<4xf32>) -> !fir.vector<4:f32>
-! CHECK-FIR: fir.store %[[vnmsum]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[z:.*]] = llvm.load %arg2 : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[msum:.*]] = llvm.call @llvm.fma.v4f32(%[[x]], %[[y]], %[[z]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32>
-! CHECK-LLVMIR: %[[vnmsum:.*]] = llvm.fneg %[[msum]]  {fastmathFlags = #llvm.fastmath<contract>} : vector<4xf32>
-! CHECK-LLVMIR: llvm.store %[[vnmsum]], %{{[0-9]}} : !llvm.ptr<vector<4xf32>>
-
-! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[z:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[msum:.*]] = call contract <4 x float> @llvm.fma.v4f32(<4 x float> %[[x]], <4 x float> %[[y]], <4 x float> %[[z]])
-! CHECK: %[[vnmsum:.*]] = fneg contract <4 x float> %[[msum]]
-! CHECK: store <4 x float> %[[vnmsum]], ptr %{{[0-9]}}, align 16
-end subroutine vec_nmadd_testf32
-
-! CHECK-LABEL: vec_nmadd_testf64
-subroutine vec_nmadd_testf64(x, y, z)
-  vector(real(8)) :: vnmsum, x, y, z
-  vnmsum = vec_nmadd(x, y, z)
-! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[z:.*]] = fir.load %arg2 : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[vx:.*]] = fir.convert %[[x]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[vy:.*]] = fir.convert %[[y]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[vz:.*]] = fir.convert %[[z]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[msum:.*]] = fir.call @llvm.fma.v2f64(%[[vx]], %[[vy]], %[[vz]]) fastmath<contract> : (vector<2xf64>, vector<2xf64>, vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: %[[vmsum:.*]] = fir.convert %[[msum]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[nmsum:.*]] = arith.negf %[[vmsum]] fastmath<contract> : vector<2xf64>
-! CHECK-FIR: %[[vnmsum:.*]] = fir.convert %[[nmsum]] : (vector<2xf64>) -> !fir.vector<2:f64>
-! CHECK-FIR: fir.store %[[vnmsum]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
-
-! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[z:.*]] = llvm.load %arg2 : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[msum:.*]] = llvm.call @llvm.fma.v2f64(%[[x]], %[[y]], %[[z]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xf64>, vector<2xf64>, vector<2xf64>) -> vector<2xf64>
-! CHECK-LLVMIR: %[[vnmsum:.*]] = llvm.fneg %[[msum]]  {fastmathFlags = #llvm.fastmath<contract>} : vector<2xf64>
-! CHECK-LLVMIR: llvm.store %[[vnmsum]], %{{[0-9]}} : !llvm.ptr<vector<2xf64>>
-
-! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[z:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
-! CHECK: %[[msum:.*]] = call contract <2 x double> @llvm.fma.v2f64(<2 x double> %[[x]], <2 x double> %[[y]], <2 x double> %[[z]])
-! CHECK: %[[vnmsum:.*]] = fneg contract <2 x double> %[[msum]]
-! CHECK: store <2 x double> %[[vnmsum]], ptr %{{[0-9]}}, align 16
-end subroutine vec_nmadd_testf64

diff  --git a/flang/test/Lower/PowerPC/ppc-vec_sel.f90 b/flang/test/Lower/PowerPC/ppc-vec_sel.f90
deleted file mode 100644
index 0802af5e77eee67..000000000000000
--- a/flang/test/Lower/PowerPC/ppc-vec_sel.f90
+++ /dev/null
@@ -1,492 +0,0 @@
-! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s
-! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s
-! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
-! REQUIRES: target=powerpc{{.*}}
-
-!----------------------
-! vec_sel
-!----------------------
-
-! CHECK-LABEL: vec_sel_testi1
-subroutine vec_sel_testi1(arg1, arg2, arg3)
-  vector(integer(1)) :: arg1, arg2, r
-  vector(unsigned(1)) :: arg3
-  r = vec_sel(arg1, arg2, arg3)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
-! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:i8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<16xi8>
-! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<16xi8>
-! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<16xi8> to vector<16xi8>
-! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8>
-! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8>
-! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8>
-! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8>
-! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<16xi8>) -> !fir.vector<16:i8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3:.*]], %[[c]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[arg1]], %[[xor]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[arg2]], %[[arg3]]  : vector<16xi8>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.or %[[and1]], %[[and2]]  : vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK:  %[[comp:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-! CHECK:  %[[and1:.*]] = and <16 x i8> %[[arg1]], %[[comp]]
-! CHECK:  %[[and2:.*]] = and <16 x i8> %[[arg2]], %[[arg3]]
-! CHECK:  %{{[0-9]+}} = or <16 x i8> %[[and1]], %[[and2]]
-end subroutine vec_sel_testi1
-
-! CHECK-LABEL: vec_sel_testi2
-subroutine vec_sel_testi2(arg1, arg2, arg3)
-  vector(integer(2)) :: arg1, arg2, r
-  vector(unsigned(2)) :: arg3
-  r = vec_sel(arg1, arg2, arg3)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
-! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:i16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8>
-! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8>
-! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8>
-! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8>
-! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<8xi16>) -> !fir.vector<8:i16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<16xi8>
-! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<16xi8>
-! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<8xi16> to vector<16xi8>
-! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1]], %[[and2]]  : vector<16xi8>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg3:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[bc1:.*]] = bitcast <8 x i16> %5 to <16 x i8>
-! CHECK: %[[bc2:.*]] = bitcast <8 x i16> %6 to <16 x i8>
-! CHECK: %[[bc3:.*]] = bitcast <8 x i16> %7 to <16 x i8>
-! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
-! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
-! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
-! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <8 x i16>
-end subroutine vec_sel_testi2
-
-! CHECK-LABEL: vec_sel_testi4
-subroutine vec_sel_testi4(arg1, arg2, arg3)
-  vector(integer(4)) :: arg1, arg2, r
-  vector(unsigned(4)) :: arg3
-  r = vec_sel(arg1, arg2, arg3)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
-! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:i32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8>
-! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8>
-! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8>
-! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8>
-! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<4xi32>) -> !fir.vector<4:i32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<4xi32> to vector<16xi8>
-! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<4xi32> to vector<16xi8>
-! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<4xi32> to vector<16xi8>
-! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1]], %[[and2]]  : vector<16xi8>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg3:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[bc1:.*]] = bitcast <4 x i32> %5 to <16 x i8>
-! CHECK: %[[bc2:.*]] = bitcast <4 x i32> %6 to <16 x i8>
-! CHECK: %[[bc3:.*]] = bitcast <4 x i32> %7 to <16 x i8>
-! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
-! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
-! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
-! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <4 x i32>
-end subroutine vec_sel_testi4
-
-! CHECK-LABEL: vec_sel_testi8
-subroutine vec_sel_testi8(arg1, arg2, arg3)
-  vector(integer(8)) :: arg1, arg2, r
-  vector(unsigned(8)) :: arg3
-  r = vec_sel(arg1, arg2, arg3)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:i64>>
-! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:i64>) -> vector<2xi64>
-! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8>
-! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8>
-! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8>
-! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8>
-! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<2xi64>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<2xi64>) -> !fir.vector<2:i64>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<2xi64> to vector<16xi8>
-! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<2xi64> to vector<16xi8>
-! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<2xi64> to vector<16xi8>
-! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1]], %[[and2]]  : vector<16xi8>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<2xi64>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg3:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[bc1:.*]] = bitcast <2 x i64> %5 to <16 x i8>
-! CHECK: %[[bc2:.*]] = bitcast <2 x i64> %6 to <16 x i8>
-! CHECK: %[[bc3:.*]] = bitcast <2 x i64> %7 to <16 x i8>
-! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
-! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
-! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
-! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <2 x i64>
-end subroutine vec_sel_testi8
-
-! CHECK-LABEL: vec_sel_testu1
-subroutine vec_sel_testu1(arg1, arg2, arg3)
-  vector(unsigned(1)) :: arg1, arg2, r
-  vector(unsigned(1)) :: arg3
-  r = vec_sel(arg1, arg2, arg3)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<16:ui8>) -> vector<16xi8>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<16xi8> to vector<16xi8>
-! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<16xi8> to vector<16xi8>
-! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<16xi8> to vector<16xi8>
-! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8>
-! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8>
-! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8>
-! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8>
-! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<16xi8>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<16xi8>) -> !fir.vector<16:ui8>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[arg3:.*]], %[[c]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[arg1]], %[[xor]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[arg2]], %[[arg3]]  : vector<16xi8>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.or %[[and1:.*]], %[[and2]]  : vector<16xi8>
-
-! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK: %[[arg3:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
-! CHECK:  %[[comp:.*]] = xor <16 x i8> %[[arg3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-! CHECK:  %[[and1:.*]] = and <16 x i8> %[[arg1]], %[[comp]]
-! CHECK:  %[[and2:.*]] = and <16 x i8> %[[arg2]], %[[arg3]]
-! CHECK:  %{{[0-9]+}} = or <16 x i8> %[[and1]], %[[and2]]
-end subroutine vec_sel_testu1
-
-! CHECK-LABEL: vec_sel_testu2
-subroutine vec_sel_testu2(arg1, arg2, arg3)
-  vector(unsigned(2)) :: arg1, arg2, r
-  vector(unsigned(2)) :: arg3
-  r = vec_sel(arg1, arg2, arg3)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<8:ui16>) -> vector<8xi16>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<8xi16> to vector<16xi8>
-! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8>
-! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8>
-! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8>
-! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8>
-! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<8xi16>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<8xi16>) -> !fir.vector<8:ui16>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<16xi8>
-! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<8xi16> to vector<16xi8>
-! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<8xi16> to vector<16xi8>
-! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1:.*]], %[[and2]]  : vector<16xi8>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<8xi16>
-
-! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[arg3:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
-! CHECK: %[[bc1:.*]] = bitcast <8 x i16> %5 to <16 x i8>
-! CHECK: %[[bc2:.*]] = bitcast <8 x i16> %6 to <16 x i8>
-! CHECK: %[[bc3:.*]] = bitcast <8 x i16> %7 to <16 x i8>
-! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
-! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
-! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
-! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <8 x i16>
-end subroutine vec_sel_testu2
-
-! CHECK-LABEL: vec_sel_testu4
-subroutine vec_sel_testu4(arg1, arg2, arg3)
-  vector(unsigned(4)) :: arg1, arg2, r
-  vector(unsigned(4)) :: arg3
-  r = vec_sel(arg1, arg2, arg3)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8>
-! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8>
-! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8>
-! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8>
-! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<4xi32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<4xi32>) -> !fir.vector<4:ui32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<4xi32> to vector<16xi8>
-! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<4xi32> to vector<16xi8>
-! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<4xi32> to vector<16xi8>
-! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1]], %[[and2]]  : vector<16xi8>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<4xi32>
-
-! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[arg3:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[bc1:.*]] = bitcast <4 x i32> %5 to <16 x i8>
-! CHECK: %[[bc2:.*]] = bitcast <4 x i32> %6 to <16 x i8>
-! CHECK: %[[bc3:.*]] = bitcast <4 x i32> %7 to <16 x i8>
-! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
-! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
-! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
-! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <4 x i32>
-end subroutine vec_sel_testu4
-
-! CHECK-LABEL: vec_sel_testu8
-subroutine vec_sel_testu8(arg1, arg2, arg3)
-  vector(unsigned(8)) :: arg1, arg2, r
-  vector(unsigned(8)) :: arg3
-  r = vec_sel(arg1, arg2, arg3)
-  
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8>
-! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8>
-! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8>
-! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8>
-! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<2xi64>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<2xi64>) -> !fir.vector<2:ui64>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<2xi64> to vector<16xi8>
-! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<2xi64> to vector<16xi8>
-! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<2xi64> to vector<16xi8>
-! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1]], %[[and2]]  : vector<16xi8>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<2xi64>
-
-! CHECK: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[arg3:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[bc1:.*]] = bitcast <2 x i64> %5 to <16 x i8>
-! CHECK: %[[bc2:.*]] = bitcast <2 x i64> %6 to <16 x i8>
-! CHECK: %[[bc3:.*]] = bitcast <2 x i64> %7 to <16 x i8>
-! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
-! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
-! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
-! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <2 x i64>
-end subroutine vec_sel_testu8
-
-! CHECK-LABEL: vec_sel_testr4
-subroutine vec_sel_testr4(arg1, arg2, arg3)
-  vector(real(4)) :: arg1, arg2, r
-  vector(unsigned(4)) :: arg3
-  r = vec_sel(arg1, arg2, arg3)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
-! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<4:f32>) -> vector<4xf32>
-! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<4:ui32>) -> vector<4xi32>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<4xf32> to vector<16xi8>
-! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<4xi32> to vector<16xi8>
-! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8>
-! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8>
-! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8>
-! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8>
-! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<4xf32>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<4xf32>) -> !fir.vector<4:f32>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<16xi8>
-! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<4xf32> to vector<16xi8>
-! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<4xi32> to vector<16xi8>
-! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1]], %[[and2]]  : vector<16xi8>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<4xf32>
-
-! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <4 x float>, ptr %{{.*}}, align 16
-! CHECK: %[[arg3:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
-! CHECK: %[[bc1:.*]] = bitcast <4 x float> %5 to <16 x i8>
-! CHECK: %[[bc2:.*]] = bitcast <4 x float> %6 to <16 x i8>
-! CHECK: %[[bc3:.*]] = bitcast <4 x i32> %7 to <16 x i8>
-! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
-! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
-! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
-! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <4 x float>
-end subroutine vec_sel_testr4
-
-! CHECK-LABEL: vec_sel_testr8
-subroutine vec_sel_testr8(arg1, arg2, arg3)
-  vector(real(8)) :: arg1, arg2, r
-  vector(unsigned(8)) :: arg3
-  r = vec_sel(arg1, arg2, arg3)
-
-! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
-! CHECK-FIR: %[[arg3:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:ui64>>
-! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[varg2:.*]] = fir.convert %[[arg2]] : (!fir.vector<2:f64>) -> vector<2xf64>
-! CHECK-FIR: %[[varg3:.*]] = fir.convert %[[arg3]] : (!fir.vector<2:ui64>) -> vector<2xi64>
-! CHECK-FIR: %[[c:.*]] = arith.constant -1 : i8
-! CHECK-FIR: %[[vc:.*]] = vector.broadcast %[[c]] : i8 to vector<16xi8>
-! CHECK-FIR: %[[bcv1:.*]] = vector.bitcast %[[varg1]] : vector<2xf64> to vector<16xi8>
-! CHECK-FIR: %[[bcv2:.*]] = vector.bitcast %[[varg2]] : vector<2xf64> to vector<16xi8>
-! CHECK-FIR: %[[bcv3:.*]] = vector.bitcast %[[varg3]] : vector<2xi64> to vector<16xi8>
-! CHECK-FIR: %[[xor:.*]] = arith.xori %[[bcv3]], %[[vc]] : vector<16xi8>
-! CHECK-FIR: %[[and1:.*]] = arith.andi %[[bcv1]], %[[xor]] : vector<16xi8>
-! CHECK-FIR: %[[and2:.*]] = arith.andi %[[bcv2]], %[[bcv3]] : vector<16xi8>
-! CHECK-FIR: %[[or:.*]] = arith.ori %[[and1]], %[[and2]] : vector<16xi8>
-! CHECK-FIR: %[[bcor:.*]] = vector.bitcast %[[or]] : vector<16xi8> to vector<2xf64>
-! CHECK-FIR: %{{[0-9]+}} = fir.convert %[[bcor]] : (vector<2xf64>) -> !fir.vector<2:f64>
-
-! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xf64>>
-! CHECK-LLVMIR: %[[arg3:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<2xi64>>
-! CHECK-LLVMIR: %[[c:.*]] = llvm.mlir.constant(dense<-1> : vector<16xi8>) : vector<16xi8>
-! CHECK-LLVMIR: %[[bc1:.*]] = llvm.bitcast %[[arg1]] : vector<2xf64> to vector<16xi8>
-! CHECK-LLVMIR: %[[bc2:.*]] = llvm.bitcast %[[arg2]] : vector<2xf64> to vector<16xi8>
-! CHECK-LLVMIR: %[[bc3:.*]] = llvm.bitcast %[[arg3]] : vector<2xi64> to vector<16xi8>
-! CHECK-LLVMIR: %[[xor:.*]] = llvm.xor %[[bc3:.*]], %[[c]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[and1:.*]] = llvm.and %[[bc1]], %[[xor]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[and2:.*]] = llvm.and %[[bc2]], %[[bc3]]  : vector<16xi8>
-! CHECK-LLVMIR: %[[or:.*]] = llvm.or %[[and1]], %[[and2]]  : vector<16xi8>
-! CHECK-LLVMIR: %{{[0-9]+}} = llvm.bitcast %[[or]] : vector<16xi8> to vector<2xf64>
-
-! CHECK: %[[arg1:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[arg2:.*]] = load <2 x double>, ptr %{{.*}}, align 16
-! CHECK: %[[arg3:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
-! CHECK: %[[bc1:.*]] = bitcast <2 x double> %5 to <16 x i8>
-! CHECK: %[[bc2:.*]] = bitcast <2 x double> %6 to <16 x i8>
-! CHECK: %[[bc3:.*]] = bitcast <2 x i64> %7 to <16 x i8>
-! CHECK: %[[comp:.*]] = xor <16 x i8> %[[bc3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-! CHECK: %[[and1:.*]] = and <16 x i8> %[[bc1]], %[[comp]]
-! CHECK: %[[and2:.*]] = and <16 x i8> %[[bc2]], %[[bc3]]
-! CHECK: %[[or:.*]] = or <16 x i8> %[[and1]], %[[and2]]
-! CHECK: %{{[0-9]+}} = bitcast <16 x i8> %[[or]] to <2 x double>
-end subroutine vec_sel_testr8

diff  --git a/flang/test/Lower/PowerPC/ppc-vector-types.f90 b/flang/test/Lower/PowerPC/ppc-vector-types.f90
index 6dc038f884b6f33..be293f873ecb429 100644
--- a/flang/test/Lower/PowerPC/ppc-vector-types.f90
+++ b/flang/test/Lower/PowerPC/ppc-vector-types.f90
@@ -1,179 +1,115 @@
-! RUN: %flang_fc1 -emit-fir -o - %s | FileCheck %s -check-prefix=CHECK-FIR
-! RUN: %flang_fc1 -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-LLVM
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-LLVM
 ! REQUIRES: target=powerpc{{.*}}
 
-! CHECK-FIR-LABEL: func.func @_QQmain()
 ! CHECK-LLVM-LABEL: define void @_QQmain
       program ppc_vec_unit
       implicit none
-      ! CHECK-FIR-DAG: %[[VI1:.*]] = fir.alloca !fir.vector<4:i32> {bindc_name = "vi1", uniq_name = "_QFEvi1"}
-      ! CHECK-FIR-DAG: %[[VI2:.*]] = fir.alloca !fir.vector<4:i32> {bindc_name = "vi2", uniq_name = "_QFEvi2"}
 
       ! CHECK-LLVM-DAG: %[[VI1:.*]] = alloca <4 x i32>, i64 1, align 16
       ! CHECK-LLVM-DAG: %[[VI2:.*]] = alloca <4 x i32>, i64 1, align 16
       vector(integer(4)) :: vi1, vi2
 
-      ! CHECK-FIR-DAG: %[[VR1:.*]] = fir.alloca !fir.vector<2:f64> {bindc_name = "vr1", uniq_name = "_QFEvr1"}
-      ! CHECK-FIR-DAG: %[[VR2:.*]] = fir.alloca !fir.vector<2:f64> {bindc_name = "vr2", uniq_name = "_QFEvr2"}
-
       ! CHECK-LLVM-DAG: %[[VR1:.*]] = alloca <2 x double>, i64 1, align 16
       ! CHECK-LLVM-DAG: %[[VR2:.*]] = alloca <2 x double>, i64 1, align 16
       vector(real(8)) :: vr1, vr2
 
-      ! CHECK-FIR-DAG: %[[VU1:.*]] = fir.alloca !fir.vector<8:ui16> {bindc_name = "vu1", uniq_name = "_QFEvu1"}
-      ! CHECK-FIR-DAG: %[[VU2:.*]] = fir.alloca !fir.vector<8:ui16> {bindc_name = "vu2", uniq_name = "_QFEvu2"}
-
       ! CHECK-LLVM-DAG: %[[VU1:.*]] = alloca <8 x i16>, i64 1, align 16
       ! CHECK-LLVM-DAG: %[[VU2:.*]] = alloca <8 x i16>, i64 1, align 16
       vector(unsigned(2)) :: vu1, vu2
 
-      ! CHECK-FIR-DAG: %[[VP1:.*]] = fir.alloca !fir.vector<256:i1> {bindc_name = "vp1", uniq_name = "_QFEvp1"}
-      ! CHECK-FIR-DAG: %[[VP2:.*]] = fir.alloca !fir.vector<256:i1> {bindc_name = "vp2", uniq_name = "_QFEvp2"}
-
       ! CHECK-LLVM-DAG: %[[VP1:.*]] = alloca <256 x i1>, i64 1, align 32
       ! CHECK-LLVM-DAG: %[[VP2:.*]] = alloca <256 x i1>, i64 1, align 32
       __vector_pair :: vp1, vp2
 
-      ! CHECK-FIR-DAG: %[[VQ1:.*]] = fir.address_of(@_QFEvq1) : !fir.ref<!fir.vector<512:i1>>
-      ! CHECK-FIR-DAG: %[[VQ2:.*]] = fir.address_of(@_QFEvq2) : !fir.ref<!fir.vector<512:i1>>
       __vector_quad :: vq1, vq2
 
-      ! CHECK-FIR: %[[RESI:.*]] = fir.call @_QFPtest_vec_integer_assign(%[[VI1]]){{.*}}: (!fir.ref<!fir.vector<4:i32>>) -> !fir.vector<4:i32>
       ! CHECK-LLVM: %[[RESI:.*]] = call <4 x i32> @_QFPtest_vec_integer_assign(ptr %[[VI1]])
       vi2 = test_vec_integer_assign(vi1)
-      ! CHECK-FIR-NEXT: fir.store %[[RESI]] to %[[VI2]] : !fir.ref<!fir.vector<4:i32>>
       ! CHECK-LLVM-NEXT: store <4 x i32> %[[RESI]], ptr %[[VI2]], align 16
 
-      ! CHECK-FIR-NEXT: %[[RESR:.*]] = fir.call @_QFPtest_vec_real_assign(%[[VR1]]){{.*}}: (!fir.ref<!fir.vector<2:f64>>) -> !fir.vector<2:f64>
       ! CHECK-LLVM-NEXT: %[[RESR:.*]] = call {{.*}}<2 x double> @_QFPtest_vec_real_assign(ptr %[[VR1]])
       vr2 = test_vec_real_assign(vr1)
-      ! CHECK-FIR-NEXT: fir.store %[[RESR]] to %[[VR2]] : !fir.ref<!fir.vector<2:f64>>
       ! CHECK-LLVM-NEXT: store <2 x double> %[[RESR]], ptr %[[VR2]], align 16
 
-      ! CHECK-FIR-NEXT: %[[RESU:.*]] = fir.call @_QFPtest_vec_unsigned_assign(%[[VU1]]){{.*}}: (!fir.ref<!fir.vector<8:ui16>>) -> !fir.vector<8:ui16>
       ! CHECK-LLVM-NEXT: %[[RESU:.*]] = call <8 x i16> @_QFPtest_vec_unsigned_assign(ptr %[[VU1]])
       vu2 = test_vec_unsigned_assign(vu1)
-      ! CHECK-FIR-NEXT: fir.store %[[RESU]] to %[[VU2]] : !fir.ref<!fir.vector<8:ui16>>
       ! CHECK-LLVM-NEXT: store <8 x i16> %[[RESU]], ptr %[[VU2]], align 16
 
-      ! CHECK-FIR-NEXT: %[[RESP:.*]] = fir.call @_QFPtest_vec_pair_assign(%[[VP1]]){{.*}}: (!fir.ref<!fir.vector<256:i1>>) -> !fir.vector<256:i1>
       ! CHECK-LLVM-NEXT: %[[RESP:.*]] = call <256 x i1> @_QFPtest_vec_pair_assign(ptr %[[VP1]])
       vp2 = test_vec_pair_assign(vp1)
-      ! CHECK-FIR-NEXT: fir.store %[[RESP]] to %[[VP2]] : !fir.ref<!fir.vector<256:i1>>
       ! CHECK-LLVM-NEXT: store <256 x i1> %[[RESP]], ptr %[[VP2]], align 32
 
-      ! CHECK-FIR-NEXT: %[[RESQ:.*]] = fir.call @_QFPtest_vec_quad_assign(%[[VQ1]]){{.*}}: (!fir.ref<!fir.vector<512:i1>>) -> !fir.vector<512:i1>
       ! CHECK-LLVM-NEXT: %[[RESQ:.*]] = call <512 x i1> @_QFPtest_vec_quad_assign(ptr @_QFEvq1)
       vq2 = test_vec_quad_assign(vq1)
-      ! CHECK-FIR-NEXT: fir.store %[[RESQ]] to %[[VQ2]] : !fir.ref<!fir.vector<512:i1>>
       ! CHECK-LLVM-NEXT: store <512 x i1> %[[RESQ]], ptr @_QFEvq2, align 64
 
       contains
-      ! CHECK-FIR-LABEL: func.func @_QFPtest_vec_integer_assign
       ! CHECK-LLVM-LABEL: define <4 x i32> @_QFPtest_vec_integer_assign
       function test_vec_integer_assign(arg1)
-        ! CHECK-FIR: %[[FUNC_RES:.*]] = fir.alloca !fir.vector<4:i32> {bindc_name = "test_vec_integer_assign"
         ! CHECK-LLVM: %[[FUNC_RES:.*]] = alloca <4 x i32>, i64 1, align 16
         vector(integer(4)) :: arg1, test_vec_integer_assign
 
-        ! CHECK-FIR-NEXT: %[[ARG0:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
-        ! CHECK-FIR-NEXT: fir.store %[[ARG0]] to %[[FUNC_RES]] : !fir.ref<!fir.vector<4:i32>>
-
         ! CHECK-LLVM-NEXT: %[[ARG0:.*]] = load <4 x i32>, ptr %0, align 16
         ! CHECK-LLVM-NEXT: store <4 x i32> %[[ARG0]], ptr %[[FUNC_RES]], align 16
 
         test_vec_integer_assign = arg1
-        ! CHECK-FIR-NEXT: %[[RET:.*]] = fir.load %[[FUNC_RES]] : !fir.ref<!fir.vector<4:i32>>
-        ! CHECK-FIR-NEXT: return %[[RET]] : !fir.vector<4:i32>
 
         ! CHECK-LLVM-NEXT: %[[RET:.*]] = load <4 x i32>, ptr %[[FUNC_RES]], align 16
         ! CHECK-LLVM-NEXT: ret <4 x i32> %[[RET]]
       end function test_vec_integer_assign
 
-      ! CHECK-FIR-LABEL: func.func @_QFPtest_vec_real_assign
       ! CHECK-LLVM-LABEL: define <2 x double> @_QFPtest_vec_real_assign
       function test_vec_real_assign(arg1)
-        ! CHECK-FIR: %[[FUNC_RES:.*]] = fir.alloca !fir.vector<2:f64> {bindc_name = "test_vec_real_assign"
         ! CHECK-LLVM: %[[FUNC_RES:.*]] = alloca <2 x double>, i64 1, align 16
         vector(real(8)) :: arg1, test_vec_real_assign
 
-        ! CHECK-FIR-NEXT: %[[ARG0:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:f64>>
-        ! CHECK-FIR-NEXT: fir.store %[[ARG0]] to %[[FUNC_RES]] : !fir.ref<!fir.vector<2:f64>>
-
         ! CHECK-LLVM-NEXT: %[[ARG0:.*]] = load <2 x double>, ptr %0, align 16
         ! CHECK-LLVM-NEXT: store <2 x double> %[[ARG0]], ptr %[[FUNC_RES]], align 16
 
         test_vec_real_assign = arg1
 
-        ! CHECK-FIR-NEXT: %[[RET:.*]] = fir.load %[[FUNC_RES]] : !fir.ref<!fir.vector<2:f64>>
-        ! CHECK-FIR-NEXT: return %[[RET]] : !fir.vector<2:f64>
-
         ! CHECK-LLVM-NEXT: %[[RET:.*]] = load <2 x double>, ptr %[[FUNC_RES]], align 16
         ! CHECK-LLVM-NEXT: ret <2 x double> %[[RET]]
       end function test_vec_real_assign
 
-      ! CHECK-FIR-LABEL: func.func @_QFPtest_vec_unsigned_assign
       ! CHECK-LLVM-LABEL: define <8 x i16> @_QFPtest_vec_unsigned_assign
       function test_vec_unsigned_assign(arg1)
-        ! CHECK-FIR: %[[FUNC_RES:.*]] = fir.alloca !fir.vector<8:ui16> {bindc_name = "test_vec_unsigned_assign"
         ! CHECK-LLVM: %[[FUNC_RES:.*]] = alloca <8 x i16>, i64 1, align 16
         vector(unsigned(2)) :: arg1, test_vec_unsigned_assign
 
-        ! CHECK-FIR-NEXT: %[[ARG0:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:ui16>>
-        ! CHECK-FIR-NEXT: fir.store %[[ARG0]] to %[[FUNC_RES]] : !fir.ref<!fir.vector<8:ui16>>
-
         ! CHECK-LLVM-NEXT: %[[ARG0:.*]] = load <8 x i16>, ptr %0, align 16
         ! CHECK-LLVM-NEXT: store <8 x i16> %[[ARG0]], ptr %[[FUNC_RES]], align 16
 
         test_vec_unsigned_assign = arg1
 
-        ! CHECK-FIR-NEXT: %[[RET:.*]] = fir.load %[[FUNC_RES]] : !fir.ref<!fir.vector<8:ui16>>
-        ! CHECK-FIR-NEXT: return %[[RET]] : !fir.vector<8:ui16>
-
         ! CHECK-LLVM-NEXT: %[[RET:.*]] = load <8 x i16>, ptr %[[FUNC_RES]], align 16
         ! CHECK-LLVM-NEXT: ret <8 x i16> %[[RET]]
       end function test_vec_unsigned_assign
 
-      ! CHECK-FIR-LABEL: func.func @_QFPtest_vec_pair_assign
       ! CHECK-LLVM-LABEL: define <256 x i1> @_QFPtest_vec_pair_assign
       function test_vec_pair_assign(arg1)
-        ! CHECK-FIR: %[[FUNC_RES:.*]] = fir.alloca !fir.vector<256:i1> {bindc_name = "test_vec_pair_assign"
         ! CHECK-LLVM: %[[FUNC_RES:.*]] = alloca <256 x i1>, i64 1, align 32
         __vector_pair :: arg1, test_vec_pair_assign
 
-        ! CHECK-FIR-NEXT: %[[ARG0:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<256:i1>>
-        ! CHECK-FIR-NEXT: fir.store %[[ARG0]] to %[[FUNC_RES]] : !fir.ref<!fir.vector<256:i1>>
-
         ! CHECK-LLVM-NEXT: %[[ARG0:.*]] = load <256 x i1>, ptr %0, align 32
         ! CHECK-LLVM-NEXT: store <256 x i1> %[[ARG0]], ptr %[[FUNC_RES]], align 32
 
         test_vec_pair_assign = arg1
 
-        ! CHECK-FIR-NEXT: %[[RET:.*]] = fir.load %[[FUNC_RES]] : !fir.ref<!fir.vector<256:i1>>
-        ! CHECK-FIR-NEXT: return %[[RET]] : !fir.vector<256:i1>
-
         ! CHECK-LLVM-NEXT: %[[RET:.*]] = load <256 x i1>, ptr %[[FUNC_RES]], align 32
         ! CHECK-LLVM-NEXT: ret <256 x i1> %[[RET]]
       end function test_vec_pair_assign
 
-      ! CHECK-FIR-LABEL: func.func @_QFPtest_vec_quad_assign
       ! CHECK-LLVM-LABEL: define <512 x i1> @_QFPtest_vec_quad_assign
       function test_vec_quad_assign(arg1)
-        ! CHECK-FIR: %[[FUNC_RES:.*]] = fir.alloca !fir.vector<512:i1> {bindc_name = "test_vec_quad_assign"
         ! CHECK-LLVM: %[[FUNC_RES:.*]] = alloca <512 x i1>, i64 1, align 64
         __vector_quad :: arg1, test_vec_quad_assign
 
-        ! CHECK-FIR-NEXT: %[[ARG0:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<512:i1>>
-        ! CHECK-FIR-NEXT: fir.store %[[ARG0]] to %[[FUNC_RES]] : !fir.ref<!fir.vector<512:i1>>
-
         ! CHECK-LLVM-NEXT: %[[ARG0:.*]] = load <512 x i1>, ptr %0, align 64
         ! CHECK-LLVM-NEXT: store <512 x i1> %[[ARG0]], ptr %[[FUNC_RES]], align 64
 
         test_vec_quad_assign = arg1
 
-        ! CHECK-FIR-NEXT: %[[RET:.*]] = fir.load %[[FUNC_RES]] : !fir.ref<!fir.vector<512:i1>>
-        ! CHECK-FIR-NEXT: return %[[RET]] : !fir.vector<512:i1>
-
         ! CHECK-LLVM-NEXT: %[[RET:.*]] = load <512 x i1>, ptr %[[FUNC_RES]], align 64
         ! CHECK-LLVM-NEXT: ret <512 x i1> %[[RET]]
       end function test_vec_quad_assign


        


More information about the flang-commits mailing list