[clang] 72d23a2 - [RISCV] Support LMUL!=1 for attribute((riscv_rvv_vector_bits(N)))

Thu Jun 8 09:14:07 PDT 2023

Author: Craig Topper
Date: 2023-06-08T09:13:43-07:00
New Revision: 72d23a2ec1b98c26aa3a85b83118d969ae286928

URL: https://github.com/llvm/llvm-project/commit/72d23a2ec1b98c26aa3a85b83118d969ae286928
DIFF: https://github.com/llvm/llvm-project/commit/72d23a2ec1b98c26aa3a85b83118d969ae286928.diff

LOG: [RISCV] Support LMUL!=1 for __attribute__((riscv_rvv_vector_bits(N)))

The first patch supported only LMUL=1 types. This patch supports
LMUL!=1.

LMUL is length multiplier that allows multiple vector registers to
be treated as one large register or a fraction of a single vector
register. Supported values for LMUL are 1/8, 1/4, 1/2, 1, 2, 4, and 8.

An LMUL=2 type will be twice as large as an LMUL=1 type. An LMUL=1/2
type will be half the size as an LMUL=1 type.

Type name with "m2" is LMUL=2, "m4" is LMUL=4.
Type name with "mf2" is LMUL=1/2, "mf4" is LMUL=1/4.

For the LMUL!=1 types the user will need to scale __riscv_v_fixed_vlen
by the LMUL before passing to the attribute.

Reviewed By: aaron.ballman

Differential Revision: https://reviews.llvm.org/D150926

Added: 
    

Modified: 
    clang/include/clang/Basic/AttrDocs.td
    clang/include/clang/Basic/DiagnosticSemaKinds.td
    clang/lib/AST/ASTContext.cpp
    clang/lib/AST/Type.cpp
    clang/lib/Sema/SemaType.cpp
    clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c
    clang/test/CodeGen/attr-riscv-rvv-vector-bits-types.c
    clang/test/Driver/riscv-rvv-vector-bits.c
    clang/test/Sema/attr-riscv-rvv-vector-bits.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 1c4198cc8d547..5bf3333972b13 100644

--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -2340,11 +2340,14 @@ unsupported for sizeless types.
 
 The attribute can be attached to a single RVV vector (such as ``vint8m1_t``).
 The attribute will be rejected unless
-``N==__riscv_v_fixed_vlen``, the implementation defined feature macro that
+``N==(__riscv_v_fixed_vlen*LMUL)``, the implementation defined feature macro that
 is enabled under the ``-mrvv-vector-bits`` flag. ``__riscv_v_fixed_vlen`` can
 only be a power of 2 between 64 and 65536.
 
-Only ``*m1_t`` (LMUL=1) types are supported at this time.
+For types where LMUL!=1, ``__riscv_v_fixed_vlen`` needs to be scaled by the LMUL
+of the type before passing to the attribute.
+
+``vbool*_t`` types are not supported at this time.
 }];
 }
 

diff  --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 1d4743a43c052..43fa732217a48 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3078,8 +3078,8 @@ def err_attribute_riscv_rvv_bits_unsupported : Error<
   "%0 is only supported when '-mrvv-vector-bits=<bits>' is specified with a "
   "value of \"zvl\" or a power 2 in the range [64,65536]">;
 def err_attribute_bad_rvv_vector_size : Error<
-  "invalid RVV vector size '%0', must match value set by "
-  "'-mrvv-vector-bits' ('%1')">;
+  "invalid RVV vector size '%0', expected size is '%1' based on LMUL of type "
+  "and '-mrvv-vector-bits'">;
 def err_attribute_invalid_rvv_type : Error<
   "%0 attribute applied to non-RVV type %1">;
 def err_attribute_requires_positive_integer : Error<

diff  --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index b7d9c3cc46e53..5bb739f2c4c8a 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -9579,11 +9579,8 @@ bool ASTContext::areCompatibleRVVTypes(QualType FirstType,
   auto IsValidCast = [this](QualType FirstType, QualType SecondType) {
     if (const auto *BT = FirstType->getAs<BuiltinType>()) {
       if (const auto *VT = SecondType->getAs<VectorType>()) {
-        if (VT->getVectorKind() == VectorType::RVVFixedLengthDataVector)
-          return FirstType->isRVVVLSBuiltinType() &&
-                 VT->getElementType().getCanonicalType() ==
-                     FirstType->getRVVEltType(*this);
-        if (VT->getVectorKind() == VectorType::GenericVector)
+        if (VT->getVectorKind() == VectorType::RVVFixedLengthDataVector ||
+            VT->getVectorKind() == VectorType::GenericVector)
           return FirstType->isRVVVLSBuiltinType() &&
                  getTypeSize(SecondType) == getRVVTypeSize(*this, BT) &&
                  hasSameType(VT->getElementType(),

diff  --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 82ac4aef0b721..167091593a592 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -2448,10 +2448,9 @@ QualType Type::getSveEltType(const ASTContext &Ctx) const {
 bool Type::isRVVVLSBuiltinType() const {
   if (const BuiltinType *BT = getAs<BuiltinType>()) {
     switch (BT->getKind()) {
-    // FIXME: Support more than LMUL 1.
 #define RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned, IsFP) \
     case BuiltinType::Id: \
-      return NF == 1 && (NumEls * ElBits) == llvm::RISCV::RVVBitsPerBlock;
+      return NF == 1;
 #include "clang/Basic/RISCVVTypes.def"
     default:
       return false;

diff  --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index f844048889b50..039082ecb29ba 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -40,7 +40,6 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/TargetParser/RISCVTargetParser.h"
 #include <bitset>
 #include <optional>
 
@@ -8345,9 +8344,10 @@ static void HandleRISCVRVVVectorBitsTypeAttr(QualType &CurType,
   unsigned MinElts = Info.EC.getKnownMinValue();
 
   // The attribute vector size must match -mrvv-vector-bits.
-  if (VecSize != VScale->first * MinElts * EltSize) {
+  unsigned ExpectedSize = VScale->first * MinElts * EltSize;
+  if (VecSize != ExpectedSize) {
     S.Diag(Attr.getLoc(), diag::err_attribute_bad_rvv_vector_size)
-        << VecSize << VScale->first * llvm::RISCV::RVVBitsPerBlock;
+        << VecSize << ExpectedSize;
     Attr.setInvalid();
     return;
   }

diff  --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c
index 5268117c5a2a3..bd120c4bcbf5c 100644
--- a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c
+++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c
@@ -16,9 +16,22 @@ typedef __rvv_uint64m1_t vuint64m1_t;
 typedef __rvv_float32m1_t vfloat32m1_t;
 typedef __rvv_float64m1_t vfloat64m1_t;
 
+typedef __rvv_int8m2_t vint8m2_t;
+typedef __rvv_uint8m2_t vuint8m2_t;
+typedef __rvv_int16m2_t vint16m2_t;
+typedef __rvv_uint16m2_t vuint16m2_t;
+typedef __rvv_int32m2_t vint32m2_t;
+typedef __rvv_uint32m2_t vuint32m2_t;
+typedef __rvv_int64m2_t vint64m2_t;
+typedef __rvv_uint64m2_t vuint64m2_t;
+typedef __rvv_float32m2_t vfloat32m2_t;
+typedef __rvv_float64m2_t vfloat64m2_t;
+
 typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
+typedef vint32m2_t fixed_int32m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
 
 fixed_int32m1_t global_vec;
+fixed_int32m2_t global_vec_m2;
 
 // CHECK-LABEL: @test_ptr_to_global(
 // CHECK-NEXT:  entry:
@@ -75,3 +88,59 @@ fixed_int32m1_t array_arg(fixed_int32m1_t arr[]) {
 fixed_int32m1_t test_cast(vint32m1_t vec) {
   return __riscv_vadd(global_vec, vec, __riscv_v_fixed_vlen/32);
 }
+
+// CHECK-LABEL: @test_ptr_to_global_m2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <16 x i32>, align 8
+// CHECK-NEXT:    [[GLOBAL_VEC_PTR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr @global_vec_m2, ptr [[GLOBAL_VEC_PTR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[GLOBAL_VEC_PTR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr [[TMP0]], align 8
+// CHECK-NEXT:    store <16 x i32> [[TMP1]], ptr [[RETVAL]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr [[RETVAL]], align 8
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v16i32(<vscale x 2 x i32> undef, <16 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m2_t test_ptr_to_global_m2() {
+  fixed_int32m2_t *global_vec_ptr;
+  global_vec_ptr = &global_vec_m2;
+  return *global_vec_ptr;
+}
+
+//
+// Test casting pointer from fixed-length array to scalable vector.
+// CHECK-LABEL: @array_arg_m2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <16 x i32>, align 8
+// CHECK-NEXT:    [[ARR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[ARR:%.*]], ptr [[ARR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds <16 x i32>, ptr [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr [[ARRAYIDX]], align 8
+// CHECK-NEXT:    store <16 x i32> [[TMP1]], ptr [[RETVAL]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr [[RETVAL]], align 8
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v16i32(<vscale x 2 x i32> undef, <16 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m2_t array_arg_m2(fixed_int32m2_t arr[]) {
+  return arr[0];
+}
+
+// CHECK-LABEL: @test_cast_m2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <16 x i32>, align 8
+// CHECK-NEXT:    [[VEC_ADDR:%.*]] = alloca <vscale x 4 x i32>, align 4
+// CHECK-NEXT:    store <vscale x 4 x i32> [[VEC:%.*]], ptr [[VEC_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr @global_vec_m2, align 8
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP0]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = load <vscale x 4 x i32>, ptr [[VEC_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> poison, <vscale x 4 x i32> [[CASTSCALABLESVE]], <vscale x 4 x i32> [[TMP1]], i64 16)
+// CHECK-NEXT:    [[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    store <16 x i32> [[CASTFIXEDSVE]], ptr [[RETVAL]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr [[RETVAL]], align 8
+// CHECK-NEXT:    [[CASTSCALABLESVE1:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v16i32(<vscale x 2 x i32> undef, <16 x i32> [[TMP3]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE1]]
+//
+fixed_int32m2_t test_cast_m2(vint32m2_t vec) {
+  return __riscv_vadd(global_vec_m2, vec, __riscv_v_fixed_vlen/16);
+}

diff  --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-types.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-types.c
index 774e0fc283823..45ac5607753f6 100644
--- a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-types.c
+++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-types.c
@@ -19,6 +19,39 @@ typedef __rvv_uint64m1_t vuint64m1_t;
 typedef __rvv_float32m1_t vfloat32m1_t;
 typedef __rvv_float64m1_t vfloat64m1_t;
 
+typedef __rvv_int8m2_t vint8m2_t;
+typedef __rvv_uint8m2_t vuint8m2_t;
+typedef __rvv_int16m2_t vint16m2_t;
+typedef __rvv_uint16m2_t vuint16m2_t;
+typedef __rvv_int32m2_t vint32m2_t;
+typedef __rvv_uint32m2_t vuint32m2_t;
+typedef __rvv_int64m2_t vint64m2_t;
+typedef __rvv_uint64m2_t vuint64m2_t;
+typedef __rvv_float32m2_t vfloat32m2_t;
+typedef __rvv_float64m2_t vfloat64m2_t;
+
+typedef __rvv_int8m4_t vint8m4_t;
+typedef __rvv_uint8m4_t vuint8m4_t;
+typedef __rvv_int16m4_t vint16m4_t;
+typedef __rvv_uint16m4_t vuint16m4_t;
+typedef __rvv_int32m4_t vint32m4_t;
+typedef __rvv_uint32m4_t vuint32m4_t;
+typedef __rvv_int64m4_t vint64m4_t;
+typedef __rvv_uint64m4_t vuint64m4_t;
+typedef __rvv_float32m4_t vfloat32m4_t;
+typedef __rvv_float64m4_t vfloat64m4_t;
+
+typedef __rvv_int8m8_t vint8m8_t;
+typedef __rvv_uint8m8_t vuint8m8_t;
+typedef __rvv_int16m8_t vint16m8_t;
+typedef __rvv_uint16m8_t vuint16m8_t;
+typedef __rvv_int32m8_t vint32m8_t;
+typedef __rvv_uint32m8_t vuint32m8_t;
+typedef __rvv_int64m8_t vint64m8_t;
+typedef __rvv_uint64m8_t vuint64m8_t;
+typedef __rvv_float32m8_t vfloat32m8_t;
+typedef __rvv_float64m8_t vfloat64m8_t;
+
 // Define valid fixed-width RVV types
 typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
 typedef vint16m1_t fixed_int16m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
@@ -33,6 +66,45 @@ typedef vuint64m1_t fixed_uint64m1_t __attribute__((riscv_rvv_vector_bits(__risc
 typedef vfloat32m1_t fixed_float32m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
 typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
 
+typedef vint8m2_t fixed_int8m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+typedef vint16m2_t fixed_int16m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+typedef vint32m2_t fixed_int32m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+typedef vint64m2_t fixed_int64m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+
+typedef vuint8m2_t fixed_uint8m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+typedef vuint16m2_t fixed_uint16m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+typedef vuint32m2_t fixed_uint32m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+typedef vuint64m2_t fixed_uint64m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+
+typedef vfloat32m2_t fixed_float32m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+typedef vfloat64m2_t fixed_float64m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+
+typedef vint8m4_t fixed_int8m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+typedef vint16m4_t fixed_int16m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+typedef vint32m4_t fixed_int32m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+typedef vint64m4_t fixed_int64m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+
+typedef vuint8m4_t fixed_uint8m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+typedef vuint16m4_t fixed_uint16m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+typedef vuint32m4_t fixed_uint32m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+typedef vuint64m4_t fixed_uint64m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+
+typedef vfloat32m4_t fixed_float32m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+typedef vfloat64m4_t fixed_float64m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+
+typedef vint8m8_t fixed_int8m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+typedef vint16m8_t fixed_int16m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+typedef vint32m8_t fixed_int32m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+typedef vint64m8_t fixed_int64m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+
+typedef vuint8m8_t fixed_uint8m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+typedef vuint16m8_t fixed_uint16m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+typedef vuint32m8_t fixed_uint32m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+typedef vuint64m8_t fixed_uint64m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+
+typedef vfloat32m8_t fixed_float32m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+typedef vfloat64m8_t fixed_float64m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+
 //===----------------------------------------------------------------------===//
 // Structs and unions
 //===----------------------------------------------------------------------===//
@@ -57,6 +129,39 @@ DEFINE_STRUCT(uint64m1)
 DEFINE_STRUCT(float32m1)
 DEFINE_STRUCT(float64m1)
 
+DEFINE_STRUCT(int8m2)
+DEFINE_STRUCT(int16m2)
+DEFINE_STRUCT(int32m2)
+DEFINE_STRUCT(int64m2)
+DEFINE_STRUCT(uint8m2)
+DEFINE_STRUCT(uint16m2)
+DEFINE_STRUCT(uint32m2)
+DEFINE_STRUCT(uint64m2)
+DEFINE_STRUCT(float32m2)
+DEFINE_STRUCT(float64m2)
+
+DEFINE_STRUCT(int8m4)
+DEFINE_STRUCT(int16m4)
+DEFINE_STRUCT(int32m4)
+DEFINE_STRUCT(int64m4)
+DEFINE_STRUCT(uint8m4)
+DEFINE_STRUCT(uint16m4)
+DEFINE_STRUCT(uint32m4)
+DEFINE_STRUCT(uint64m4)
+DEFINE_STRUCT(float32m4)
+DEFINE_STRUCT(float64m4)
+
+DEFINE_STRUCT(int8m8)
+DEFINE_STRUCT(int16m8)
+DEFINE_STRUCT(int32m8)
+DEFINE_STRUCT(int64m8)
+DEFINE_STRUCT(uint8m8)
+DEFINE_STRUCT(uint16m8)
+DEFINE_STRUCT(uint32m8)
+DEFINE_STRUCT(uint64m8)
+DEFINE_STRUCT(float32m8)
+DEFINE_STRUCT(float64m8)
+
 DEFINE_UNION(int8m1)
 DEFINE_UNION(int16m1)
 DEFINE_UNION(int32m1)
@@ -68,6 +173,39 @@ DEFINE_UNION(uint64m1)
 DEFINE_UNION(float32m1)
 DEFINE_UNION(float64m1)
 
+DEFINE_UNION(int8m2)
+DEFINE_UNION(int16m2)
+DEFINE_UNION(int32m2)
+DEFINE_UNION(int64m2)
+DEFINE_UNION(uint8m2)
+DEFINE_UNION(uint16m2)
+DEFINE_UNION(uint32m2)
+DEFINE_UNION(uint64m2)
+DEFINE_UNION(float32m2)
+DEFINE_UNION(float64m2)
+
+DEFINE_UNION(int8m4)
+DEFINE_UNION(int16m4)
+DEFINE_UNION(int32m4)
+DEFINE_UNION(int64m4)
+DEFINE_UNION(uint8m4)
+DEFINE_UNION(uint16m4)
+DEFINE_UNION(uint32m4)
+DEFINE_UNION(uint64m4)
+DEFINE_UNION(float32m4)
+DEFINE_UNION(float64m4)
+
+DEFINE_UNION(int8m8)
+DEFINE_UNION(int16m8)
+DEFINE_UNION(int32m8)
+DEFINE_UNION(int64m8)
+DEFINE_UNION(uint8m8)
+DEFINE_UNION(uint16m8)
+DEFINE_UNION(uint32m8)
+DEFINE_UNION(uint64m8)
+DEFINE_UNION(float32m8)
+DEFINE_UNION(float64m8)
+
 //===----------------------------------------------------------------------===//
 // Global variables
 //===----------------------------------------------------------------------===//
@@ -84,6 +222,45 @@ fixed_uint64m1_t global_u64;
 fixed_float32m1_t global_f32;
 fixed_float64m1_t global_f64;
 
+fixed_int8m2_t global_i8m2;
+fixed_int16m2_t global_i16m2;
+fixed_int32m2_t global_i32m2;
+fixed_int64m2_t global_i64m2;
+
+fixed_uint8m2_t global_u8m2;
+fixed_uint16m2_t global_u16m2;
+fixed_uint32m2_t global_u32m2;
+fixed_uint64m2_t global_u64m2;
+
+fixed_float32m2_t global_f32m2;
+fixed_float64m2_t global_f64m2;
+
+fixed_int8m4_t global_i8m4;
+fixed_int16m4_t global_i16m4;
+fixed_int32m4_t global_i32m4;
+fixed_int64m4_t global_i64m4;
+
+fixed_uint8m4_t global_u8m4;
+fixed_uint16m4_t global_u16m4;
+fixed_uint32m4_t global_u32m4;
+fixed_uint64m4_t global_u64m4;
+
+fixed_float32m4_t global_f32m4;
+fixed_float64m4_t global_f64m4;
+
+fixed_int8m8_t global_i8m8;
+fixed_int16m8_t global_i16m8;
+fixed_int32m8_t global_i32m8;
+fixed_int64m8_t global_i64m8;
+
+fixed_uint8m8_t global_u8m8;
+fixed_uint16m8_t global_u16m8;
+fixed_uint32m8_t global_u32m8;
+fixed_uint64m8_t global_u64m8;
+
+fixed_float32m8_t global_f32m8;
+fixed_float64m8_t global_f64m8;
+
 //===----------------------------------------------------------------------===//
 // Global arrays
 //===----------------------------------------------------------------------===//
@@ -100,6 +277,45 @@ fixed_uint64m1_t global_arr_u64[3];
 fixed_float32m1_t global_arr_f32[3];
 fixed_float64m1_t global_arr_f64[3];
 
+fixed_int8m2_t global_arr_i8m2[3];
+fixed_int16m2_t global_arr_i16m2[3];
+fixed_int32m2_t global_arr_i32m2[3];
+fixed_int64m2_t global_arr_i64m2[3];
+
+fixed_uint8m2_t global_arr_u8m2[3];
+fixed_uint16m2_t global_arr_u16m2[3];
+fixed_uint32m2_t global_arr_u32m2[3];
+fixed_uint64m2_t global_arr_u64m2[3];
+
+fixed_float32m2_t global_arr_f32m2[3];
+fixed_float64m2_t global_arr_f64m2[3];
+
+fixed_int8m4_t global_arr_i8m4[3];
+fixed_int16m4_t global_arr_i16m4[3];
+fixed_int32m4_t global_arr_i32m4[3];
+fixed_int64m4_t global_arr_i64m4[3];
+
+fixed_uint8m4_t global_arr_u8m4[3];
+fixed_uint16m4_t global_arr_u16m4[3];
+fixed_uint32m4_t global_arr_u32m4[3];
+fixed_uint64m4_t global_arr_u64m4[3];
+
+fixed_float32m4_t global_arr_f32m4[3];
+fixed_float64m4_t global_arr_f64m4[3];
+
+fixed_int8m8_t global_arr_i8m8[3];
+fixed_int16m8_t global_arr_i16m8[3];
+fixed_int32m8_t global_arr_i32m8[3];
+fixed_int64m8_t global_arr_i64m8[3];
+
+fixed_uint8m8_t global_arr_u8m8[3];
+fixed_uint16m8_t global_arr_u16m8[3];
+fixed_uint32m8_t global_arr_u32m8[3];
+fixed_uint64m8_t global_arr_u64m8[3];
+
+fixed_float32m8_t global_arr_f32m8[3];
+fixed_float64m8_t global_arr_f64m8[3];
+
 //===----------------------------------------------------------------------===//
 // Locals
 //===----------------------------------------------------------------------===//
@@ -116,6 +332,39 @@ void f() {
   fixed_float32m1_t local_f32;
   fixed_float64m1_t local_f64;
 
+  fixed_int8m2_t local_i8m2;
+  fixed_int16m2_t local_i16m2;
+  fixed_int32m2_t local_i32m2;
+  fixed_int64m2_t local_i64m2;
+  fixed_uint8m2_t local_u8m2;
+  fixed_uint16m2_t local_u16m2;
+  fixed_uint32m2_t local_u32m2;
+  fixed_uint64m2_t local_u64m2;
+  fixed_float32m2_t local_f32m2;
+  fixed_float64m2_t local_f64m2;
+
+  fixed_int8m4_t local_i8m4;
+  fixed_int16m4_t local_i16m4;
+  fixed_int32m4_t local_i32m4;
+  fixed_int64m4_t local_i64m4;
+  fixed_uint8m4_t local_u8m4;
+  fixed_uint16m4_t local_u16m4;
+  fixed_uint32m4_t local_u32m4;
+  fixed_uint64m4_t local_u64m4;
+  fixed_float32m4_t local_f32m4;
+  fixed_float64m4_t local_f64m4;
+
+  fixed_int8m8_t local_i8m8;
+  fixed_int16m8_t local_i16m8;
+  fixed_int32m8_t local_i32m8;
+  fixed_int64m8_t local_i64m8;
+  fixed_uint8m8_t local_u8m8;
+  fixed_uint16m8_t local_u16m8;
+  fixed_uint32m8_t local_u32m8;
+  fixed_uint64m8_t local_u64m8;
+  fixed_float32m8_t local_f32m8;
+  fixed_float64m8_t local_f64m8;
+
   // Arrays
   fixed_int8m1_t local_arr_i8[3];
   fixed_int16m1_t local_arr_i16[3];
@@ -127,6 +376,39 @@ void f() {
   fixed_uint64m1_t local_arr_u64[3];
   fixed_float32m1_t local_arr_f32[3];
   fixed_float64m1_t local_arr_f64[3];
+
+  fixed_int8m2_t local_arr_i8m2[3];
+  fixed_int16m2_t local_arr_i16m2[3];
+  fixed_int32m2_t local_arr_i32m2[3];
+  fixed_int64m2_t local_arr_i64m2[3];
+  fixed_uint8m2_t local_arr_u8m2[3];
+  fixed_uint16m2_t local_arr_u16m2[3];
+  fixed_uint32m2_t local_arr_u32m2[3];
+  fixed_uint64m2_t local_arr_u64m2[3];
+  fixed_float32m2_t local_arr_f32m2[3];
+  fixed_float64m2_t local_arr_f64m2[3];
+
+  fixed_int8m4_t local_arr_i8m4[3];
+  fixed_int16m4_t local_arr_i16m4[3];
+  fixed_int32m4_t local_arr_i32m4[3];
+  fixed_int64m4_t local_arr_i64m4[3];
+  fixed_uint8m4_t local_arr_u8m4[3];
+  fixed_uint16m4_t local_arr_u16m4[3];
+  fixed_uint32m4_t local_arr_u32m4[3];
+  fixed_uint64m4_t local_arr_u64m4[3];
+  fixed_float32m4_t local_arr_f32m4[3];
+  fixed_float64m4_t local_arr_f64m4[3];
+
+  fixed_int8m8_t local_arr_i8m8[3];
+  fixed_int16m8_t local_arr_i16m8[3];
+  fixed_int32m8_t local_arr_i32m8[3];
+  fixed_int64m8_t local_arr_i64m8[3];
+  fixed_uint8m8_t local_arr_u8m8[3];
+  fixed_uint16m8_t local_arr_u16m8[3];
+  fixed_uint32m8_t local_arr_u32m8[3];
+  fixed_uint64m8_t local_arr_u64m8[3];
+  fixed_float32m8_t local_arr_f32m8[3];
+  fixed_float64m8_t local_arr_f64m8[3];
 }
 
 //===----------------------------------------------------------------------===//
@@ -142,6 +424,36 @@ void f() {
 // CHECK-64-NEXT: %struct.struct_uint64m1 = type { <1 x i64> }
 // CHECK-64-NEXT: %struct.struct_float32m1 = type { <2 x float> }
 // CHECK-64-NEXT: %struct.struct_float64m1 = type { <1 x double> }
+// CHECK-64-NEXT: %struct.struct_int8m2 = type { <16 x i8> }
+// CHECK-64-NEXT: %struct.struct_int16m2 = type { <8 x i16> }
+// CHECK-64-NEXT: %struct.struct_int32m2 = type { <4 x i32> }
+// CHECK-64-NEXT: %struct.struct_int64m2 = type { <2 x i64> }
+// CHECK-64-NEXT: %struct.struct_uint8m2 = type { <16 x i8> }
+// CHECK-64-NEXT: %struct.struct_uint16m2 = type { <8 x i16> }
+// CHECK-64-NEXT: %struct.struct_uint32m2 = type { <4 x i32> }
+// CHECK-64-NEXT: %struct.struct_uint64m2 = type { <2 x i64> }
+// CHECK-64-NEXT: %struct.struct_float32m2 = type { <4 x float> }
+// CHECK-64-NEXT: %struct.struct_float64m2 = type { <2 x double> }
+// CHECK-64-NEXT: %struct.struct_int8m4 = type { <32 x i8> }
+// CHECK-64-NEXT: %struct.struct_int16m4 = type { <16 x i16> }
+// CHECK-64-NEXT: %struct.struct_int32m4 = type { <8 x i32> }
+// CHECK-64-NEXT: %struct.struct_int64m4 = type { <4 x i64> }
+// CHECK-64-NEXT: %struct.struct_uint8m4 = type { <32 x i8> }
+// CHECK-64-NEXT: %struct.struct_uint16m4 = type { <16 x i16> }
+// CHECK-64-NEXT: %struct.struct_uint32m4 = type { <8 x i32> }
+// CHECK-64-NEXT: %struct.struct_uint64m4 = type { <4 x i64> }
+// CHECK-64-NEXT: %struct.struct_float32m4 = type { <8 x float> }
+// CHECK-64-NEXT: %struct.struct_float64m4 = type { <4 x double> }
+// CHECK-64-NEXT: %struct.struct_int8m8 = type { <64 x i8> }
+// CHECK-64-NEXT: %struct.struct_int16m8 = type { <32 x i16> }
+// CHECK-64-NEXT: %struct.struct_int32m8 = type { <16 x i32> }
+// CHECK-64-NEXT: %struct.struct_int64m8 = type { <8 x i64> }
+// CHECK-64-NEXT: %struct.struct_uint8m8 = type { <64 x i8> }
+// CHECK-64-NEXT: %struct.struct_uint16m8 = type { <32 x i16> }
+// CHECK-64-NEXT: %struct.struct_uint32m8 = type { <16 x i32> }
+// CHECK-64-NEXT: %struct.struct_uint64m8 = type { <8 x i64> }
+// CHECK-64-NEXT: %struct.struct_float32m8 = type { <16 x float> }
+// CHECK-64-NEXT: %struct.struct_float64m8 = type { <8 x double> }
 
 // CHECK-128:      %struct.struct_int8m1 = type { <16 x i8> }
 // CHECK-128-NEXT: %struct.struct_int16m1 = type { <8 x i16> }
@@ -153,6 +465,36 @@ void f() {
 // CHECK-128-NEXT: %struct.struct_uint64m1 = type { <2 x i64> }
 // CHECK-128-NEXT: %struct.struct_float32m1 = type { <4 x float> }
 // CHECK-128-NEXT: %struct.struct_float64m1 = type { <2 x double> }
+// CHECK-128-NEXT: %struct.struct_int8m2 = type { <32 x i8> }
+// CHECK-128-NEXT: %struct.struct_int16m2 = type { <16 x i16> }
+// CHECK-128-NEXT: %struct.struct_int32m2 = type { <8 x i32> }
+// CHECK-128-NEXT: %struct.struct_int64m2 = type { <4 x i64> }
+// CHECK-128-NEXT: %struct.struct_uint8m2 = type { <32 x i8> }
+// CHECK-128-NEXT: %struct.struct_uint16m2 = type { <16 x i16> }
+// CHECK-128-NEXT: %struct.struct_uint32m2 = type { <8 x i32> }
+// CHECK-128-NEXT: %struct.struct_uint64m2 = type { <4 x i64> }
+// CHECK-128-NEXT: %struct.struct_float32m2 = type { <8 x float> }
+// CHECK-128-NEXT: %struct.struct_float64m2 = type { <4 x double> }
+// CHECK-128-NEXT: %struct.struct_int8m4 = type { <64 x i8> }
+// CHECK-128-NEXT: %struct.struct_int16m4 = type { <32 x i16> }
+// CHECK-128-NEXT: %struct.struct_int32m4 = type { <16 x i32> }
+// CHECK-128-NEXT: %struct.struct_int64m4 = type { <8 x i64> }
+// CHECK-128-NEXT: %struct.struct_uint8m4 = type { <64 x i8> }
+// CHECK-128-NEXT: %struct.struct_uint16m4 = type { <32 x i16> }
+// CHECK-128-NEXT: %struct.struct_uint32m4 = type { <16 x i32> }
+// CHECK-128-NEXT: %struct.struct_uint64m4 = type { <8 x i64> }
+// CHECK-128-NEXT: %struct.struct_float32m4 = type { <16 x float> }
+// CHECK-128-NEXT: %struct.struct_float64m4 = type { <8 x double> }
+// CHECK-128-NEXT: %struct.struct_int8m8 = type { <128 x i8> }
+// CHECK-128-NEXT: %struct.struct_int16m8 = type { <64 x i16> }
+// CHECK-128-NEXT: %struct.struct_int32m8 = type { <32 x i32> }
+// CHECK-128-NEXT: %struct.struct_int64m8 = type { <16 x i64> }
+// CHECK-128-NEXT: %struct.struct_uint8m8 = type { <128 x i8> }
+// CHECK-128-NEXT: %struct.struct_uint16m8 = type { <64 x i16> }
+// CHECK-128-NEXT: %struct.struct_uint32m8 = type { <32 x i32> }
+// CHECK-128-NEXT: %struct.struct_uint64m8 = type { <16 x i64> }
+// CHECK-128-NEXT: %struct.struct_float32m8 = type { <32 x float> }
+// CHECK-128-NEXT: %struct.struct_float64m8 = type { <16 x double> }
 
 // CHECK-256:      %struct.struct_int8m1 = type { <32 x i8> }
 // CHECK-256-NEXT: %struct.struct_int16m1 = type { <16 x i16> }
@@ -164,6 +506,35 @@ void f() {
 // CHECK-256-NEXT: %struct.struct_uint64m1 = type { <4 x i64> }
 // CHECK-256-NEXT: %struct.struct_float32m1 = type { <8 x float> }
 // CHECK-256-NEXT: %struct.struct_float64m1 = type { <4 x double> }
+// CHECK-256-NEXT: %struct.struct_int8m2 = type { <64 x i8> }
+// CHECK-256-NEXT: %struct.struct_int16m2 = type { <32 x i16> }
+// CHECK-256-NEXT: %struct.struct_int32m2 = type { <16 x i32> }
+// CHECK-256-NEXT: %struct.struct_int64m2 = type { <8 x i64> }
+// CHECK-256-NEXT: %struct.struct_uint8m2 = type { <64 x i8> }
+// CHECK-256-NEXT: %struct.struct_uint16m2 = type { <32 x i16> }
+// CHECK-256-NEXT: %struct.struct_uint32m2 = type { <16 x i32> }
+// CHECK-256-NEXT: %struct.struct_uint64m2 = type { <8 x i64> }
+// CHECK-256-NEXT: %struct.struct_float32m2 = type { <16 x float> }
+// CHECK-256-NEXT: %struct.struct_float64m2 = type { <8 x double> }
+// CHECK-256-NEXT: %struct.struct_int8m4 = type { <128 x i8> }
+// CHECK-256-NEXT: %struct.struct_int16m4 = type { <64 x i16> }
+// CHECK-256-NEXT: %struct.struct_int32m4 = type { <32 x i32> }
+// CHECK-256-NEXT: %struct.struct_int64m4 = type { <16 x i64> }
+// CHECK-256-NEXT: %struct.struct_uint8m4 = type { <128 x i8> }
+// CHECK-256-NEXT: %struct.struct_uint16m4 = type { <64 x i16> }
+// CHECK-256-NEXT: %struct.struct_uint32m4 = type { <32 x i32> }
+// CHECK-256-NEXT: %struct.struct_uint64m4 = type { <16 x i64> }
+// CHECK-256-NEXT: %struct.struct_float32m4 = type { <32 x float> }
+// CHECK-256-NEXT: %struct.struct_float64m4 = type { <16 x double> }
+// CHECK-256-NEXT: %struct.struct_int8m8 = type { <256 x i8> }
+// CHECK-256-NEXT: %struct.struct_int16m8 = type { <128 x i16> }
+// CHECK-256-NEXT: %struct.struct_int32m8 = type { <64 x i32> }
+// CHECK-256-NEXT: %struct.struct_int64m8 = type { <32 x i64> }
+// CHECK-256-NEXT: %struct.struct_uint8m8 = type { <256 x i8> }
+// CHECK-256-NEXT: %struct.struct_uint16m8 = type { <128 x i16> }
+// CHECK-256-NEXT: %struct.struct_uint32m8 = type { <64 x i32> }
+// CHECK-256-NEXT: %struct.struct_uint64m8 = type { <32 x i64> }
+// CHECK-256-NEXT: %struct.struct_float32m8 = type { <64 x float> }
 
 // CHECK-512:      %struct.struct_int8m1 = type { <64 x i8> }
 // CHECK-512-NEXT: %struct.struct_int16m1 = type { <32 x i16> }
@@ -175,6 +546,35 @@ void f() {
 // CHECK-512-NEXT: %struct.struct_uint64m1 = type { <8 x i64> }
 // CHECK-512-NEXT: %struct.struct_float32m1 = type { <16 x float> }
 // CHECK-512-NEXT: %struct.struct_float64m1 = type { <8 x double> }
+// CHECK-512-NEXT: %struct.struct_int8m2 = type { <128 x i8> }
+// CHECK-512-NEXT: %struct.struct_int16m2 = type { <64 x i16> }
+// CHECK-512-NEXT: %struct.struct_int32m2 = type { <32 x i32> }
+// CHECK-512-NEXT: %struct.struct_int64m2 = type { <16 x i64> }
+// CHECK-512-NEXT: %struct.struct_uint8m2 = type { <128 x i8> }
+// CHECK-512-NEXT: %struct.struct_uint16m2 = type { <64 x i16> }
+// CHECK-512-NEXT: %struct.struct_uint32m2 = type { <32 x i32> }
+// CHECK-512-NEXT: %struct.struct_uint64m2 = type { <16 x i64> }
+// CHECK-512-NEXT: %struct.struct_float32m2 = type { <32 x float> }
+// CHECK-512-NEXT: %struct.struct_float64m2 = type { <16 x double> }
+// CHECK-512-NEXT: %struct.struct_int8m4 = type { <256 x i8> }
+// CHECK-512-NEXT: %struct.struct_int16m4 = type { <128 x i16> }
+// CHECK-512-NEXT: %struct.struct_int32m4 = type { <64 x i32> }
+// CHECK-512-NEXT: %struct.struct_int64m4 = type { <32 x i64> }
+// CHECK-512-NEXT: %struct.struct_uint8m4 = type { <256 x i8> }
+// CHECK-512-NEXT: %struct.struct_uint16m4 = type { <128 x i16> }
+// CHECK-512-NEXT: %struct.struct_uint32m4 = type { <64 x i32> }
+// CHECK-512-NEXT: %struct.struct_uint64m4 = type { <32 x i64> }
+// CHECK-512-NEXT: %struct.struct_float32m4 = type { <64 x float> }
+// CHECK-512-NEXT: %struct.struct_float64m4 = type { <32 x double> }
+// CHECK-512-NEXT: %struct.struct_int8m8 = type { <512 x i8> }
+// CHECK-512-NEXT: %struct.struct_int16m8 = type { <256 x i16> }
+// CHECK-512-NEXT: %struct.struct_int32m8 = type { <128 x i32> }
+// CHECK-512-NEXT: %struct.struct_int64m8 = type { <64 x i64> }
+// CHECK-512-NEXT: %struct.struct_uint8m8 = type { <512 x i8> }
+// CHECK-512-NEXT: %struct.struct_uint16m8 = type { <256 x i16> }
+// CHECK-512-NEXT: %struct.struct_uint32m8 = type { <128 x i32> }
+// CHECK-512-NEXT: %struct.struct_uint64m8 = type { <64 x i64> }
+// CHECK-512-NEXT: %struct.struct_float32m8 = type { <128 x float> }
 
 // CHECK-1024:      %struct.struct_int8m1 = type { <128 x i8> }
 // CHECK-1024-NEXT: %struct.struct_int16m1 = type { <64 x i16> }
@@ -186,6 +586,35 @@ void f() {
 // CHECK-1024-NEXT: %struct.struct_uint64m1 = type { <16 x i64> }
 // CHECK-1024-NEXT: %struct.struct_float32m1 = type { <32 x float> }
 // CHECK-1024-NEXT: %struct.struct_float64m1 = type { <16 x double> }
+// CHECK-1024-NEXT: %struct.struct_int8m2 = type { <256 x i8> }
+// CHECK-1024-NEXT: %struct.struct_int16m2 = type { <128 x i16> }
+// CHECK-1024-NEXT: %struct.struct_int32m2 = type { <64 x i32> }
+// CHECK-1024-NEXT: %struct.struct_int64m2 = type { <32 x i64> }
+// CHECK-1024-NEXT: %struct.struct_uint8m2 = type { <256 x i8> }
+// CHECK-1024-NEXT: %struct.struct_uint16m2 = type { <128 x i16> }
+// CHECK-1024-NEXT: %struct.struct_uint32m2 = type { <64 x i32> }
+// CHECK-1024-NEXT: %struct.struct_uint64m2 = type { <32 x i64> }
+// CHECK-1024-NEXT: %struct.struct_float32m2 = type { <64 x float> }
+// CHECK-1024-NEXT: %struct.struct_float64m2 = type { <32 x double> }
+// CHECK-1024-NEXT: %struct.struct_int8m4 = type { <512 x i8> }
+// CHECK-1024-NEXT: %struct.struct_int16m4 = type { <256 x i16> }
+// CHECK-1024-NEXT: %struct.struct_int32m4 = type { <128 x i32> }
+// CHECK-1024-NEXT: %struct.struct_int64m4 = type { <64 x i64> }
+// CHECK-1024-NEXT: %struct.struct_uint8m4 = type { <512 x i8> }
+// CHECK-1024-NEXT: %struct.struct_uint16m4 = type { <256 x i16> }
+// CHECK-1024-NEXT: %struct.struct_uint32m4 = type { <128 x i32> }
+// CHECK-1024-NEXT: %struct.struct_uint64m4 = type { <64 x i64> }
+// CHECK-1024-NEXT: %struct.struct_float32m4 = type { <128 x float> }
+// CHECK-1024-NEXT: %struct.struct_float64m4 = type { <64 x double> }
+// CHECK-1024-NEXT: %struct.struct_int8m8 = type { <1024 x i8> }
+// CHECK-1024-NEXT: %struct.struct_int16m8 = type { <512 x i16> }
+// CHECK-1024-NEXT: %struct.struct_int32m8 = type { <256 x i32> }
+// CHECK-1024-NEXT: %struct.struct_int64m8 = type { <128 x i64> }
+// CHECK-1024-NEXT: %struct.struct_uint8m8 = type { <1024 x i8> }
+// CHECK-1024-NEXT: %struct.struct_uint16m8 = type { <512 x i16> }
+// CHECK-1024-NEXT: %struct.struct_uint32m8 = type { <256 x i32> }
+// CHECK-1024-NEXT: %struct.struct_uint64m8 = type { <128 x i64> }
+// CHECK-1024-NEXT: %struct.struct_float32m8 = type { <256 x float> }
 
 // CHECK-64:      %union.union_int8m1 = type { <8 x i8> }
 // CHECK-64-NEXT: %union.union_int16m1 = type { <4 x i16> }
@@ -197,6 +626,36 @@ void f() {
 // CHECK-64-NEXT: %union.union_uint64m1 = type { <1 x i64> }
 // CHECK-64-NEXT: %union.union_float32m1 = type { <2 x float> }
 // CHECK-64-NEXT: %union.union_float64m1 = type { <1 x double> }
+// CHECK-64-NEXT: %union.union_int8m2 = type { <16 x i8> }
+// CHECK-64-NEXT: %union.union_int16m2 = type { <8 x i16> }
+// CHECK-64-NEXT: %union.union_int32m2 = type { <4 x i32> }
+// CHECK-64-NEXT: %union.union_int64m2 = type { <2 x i64> }
+// CHECK-64-NEXT: %union.union_uint8m2 = type { <16 x i8> }
+// CHECK-64-NEXT: %union.union_uint16m2 = type { <8 x i16> }
+// CHECK-64-NEXT: %union.union_uint32m2 = type { <4 x i32> }
+// CHECK-64-NEXT: %union.union_uint64m2 = type { <2 x i64> }
+// CHECK-64-NEXT: %union.union_float32m2 = type { <4 x float> }
+// CHECK-64-NEXT: %union.union_float64m2 = type { <2 x double> }
+// CHECK-64-NEXT: %union.union_int8m4 = type { <32 x i8> }
+// CHECK-64-NEXT: %union.union_int16m4 = type { <16 x i16> }
+// CHECK-64-NEXT: %union.union_int32m4 = type { <8 x i32> }
+// CHECK-64-NEXT: %union.union_int64m4 = type { <4 x i64> }
+// CHECK-64-NEXT: %union.union_uint8m4 = type { <32 x i8> }
+// CHECK-64-NEXT: %union.union_uint16m4 = type { <16 x i16> }
+// CHECK-64-NEXT: %union.union_uint32m4 = type { <8 x i32> }
+// CHECK-64-NEXT: %union.union_uint64m4 = type { <4 x i64> }
+// CHECK-64-NEXT: %union.union_float32m4 = type { <8 x float> }
+// CHECK-64-NEXT: %union.union_float64m4 = type { <4 x double> }
+// CHECK-64-NEXT: %union.union_int8m8 = type { <64 x i8> }
+// CHECK-64-NEXT: %union.union_int16m8 = type { <32 x i16> }
+// CHECK-64-NEXT: %union.union_int32m8 = type { <16 x i32> }
+// CHECK-64-NEXT: %union.union_int64m8 = type { <8 x i64> }
+// CHECK-64-NEXT: %union.union_uint8m8 = type { <64 x i8> }
+// CHECK-64-NEXT: %union.union_uint16m8 = type { <32 x i16> }
+// CHECK-64-NEXT: %union.union_uint32m8 = type { <16 x i32> }
+// CHECK-64-NEXT: %union.union_uint64m8 = type { <8 x i64> }
+// CHECK-64-NEXT: %union.union_float32m8 = type { <16 x float> }
+// CHECK-64-NEXT: %union.union_float64m8 = type { <8 x double> }
 
 // CHECK-128:      %union.union_int8m1 = type { <16 x i8> }
 // CHECK-128-NEXT: %union.union_int16m1 = type { <8 x i16> }
@@ -208,6 +667,36 @@ void f() {
 // CHECK-128-NEXT: %union.union_uint64m1 = type { <2 x i64> }
 // CHECK-128-NEXT: %union.union_float32m1 = type { <4 x float> }
 // CHECK-128-NEXT: %union.union_float64m1 = type { <2 x double> }
+// CHECK-128-NEXT: %union.union_int8m2 = type { <32 x i8> }
+// CHECK-128-NEXT: %union.union_int16m2 = type { <16 x i16> }
+// CHECK-128-NEXT: %union.union_int32m2 = type { <8 x i32> }
+// CHECK-128-NEXT: %union.union_int64m2 = type { <4 x i64> }
+// CHECK-128-NEXT: %union.union_uint8m2 = type { <32 x i8> }
+// CHECK-128-NEXT: %union.union_uint16m2 = type { <16 x i16> }
+// CHECK-128-NEXT: %union.union_uint32m2 = type { <8 x i32> }
+// CHECK-128-NEXT: %union.union_uint64m2 = type { <4 x i64> }
+// CHECK-128-NEXT: %union.union_float32m2 = type { <8 x float> }
+// CHECK-128-NEXT: %union.union_float64m2 = type { <4 x double> }
+// CHECK-128-NEXT: %union.union_int8m4 = type { <64 x i8> }
+// CHECK-128-NEXT: %union.union_int16m4 = type { <32 x i16> }
+// CHECK-128-NEXT: %union.union_int32m4 = type { <16 x i32> }
+// CHECK-128-NEXT: %union.union_int64m4 = type { <8 x i64> }
+// CHECK-128-NEXT: %union.union_uint8m4 = type { <64 x i8> }
+// CHECK-128-NEXT: %union.union_uint16m4 = type { <32 x i16> }
+// CHECK-128-NEXT: %union.union_uint32m4 = type { <16 x i32> }
+// CHECK-128-NEXT: %union.union_uint64m4 = type { <8 x i64> }
+// CHECK-128-NEXT: %union.union_float32m4 = type { <16 x float> }
+// CHECK-128-NEXT: %union.union_float64m4 = type { <8 x double> }
+// CHECK-128-NEXT: %union.union_int8m8 = type { <128 x i8> }
+// CHECK-128-NEXT: %union.union_int16m8 = type { <64 x i16> }
+// CHECK-128-NEXT: %union.union_int32m8 = type { <32 x i32> }
+// CHECK-128-NEXT: %union.union_int64m8 = type { <16 x i64> }
+// CHECK-128-NEXT: %union.union_uint8m8 = type { <128 x i8> }
+// CHECK-128-NEXT: %union.union_uint16m8 = type { <64 x i16> }
+// CHECK-128-NEXT: %union.union_uint32m8 = type { <32 x i32> }
+// CHECK-128-NEXT: %union.union_uint64m8 = type { <16 x i64> }
+// CHECK-128-NEXT: %union.union_float32m8 = type { <32 x float> }
+// CHECK-128-NEXT: %union.union_float64m8 = type { <16 x double> }
 
 // CHECK-256:      %union.union_int8m1 = type { <32 x i8> }
 // CHECK-256-NEXT: %union.union_int16m1 = type { <16 x i16> }
@@ -219,6 +708,36 @@ void f() {
 // CHECK-256-NEXT: %union.union_uint64m1 = type { <4 x i64> }
 // CHECK-256-NEXT: %union.union_float32m1 = type { <8 x float> }
 // CHECK-256-NEXT: %union.union_float64m1 = type { <4 x double> }
+// CHECK-256-NEXT: %union.union_int8m2 = type { <64 x i8> }
+// CHECK-256-NEXT: %union.union_int16m2 = type { <32 x i16> }
+// CHECK-256-NEXT: %union.union_int32m2 = type { <16 x i32> }
+// CHECK-256-NEXT: %union.union_int64m2 = type { <8 x i64> }
+// CHECK-256-NEXT: %union.union_uint8m2 = type { <64 x i8> }
+// CHECK-256-NEXT: %union.union_uint16m2 = type { <32 x i16> }
+// CHECK-256-NEXT: %union.union_uint32m2 = type { <16 x i32> }
+// CHECK-256-NEXT: %union.union_uint64m2 = type { <8 x i64> }
+// CHECK-256-NEXT: %union.union_float32m2 = type { <16 x float> }
+// CHECK-256-NEXT: %union.union_float64m2 = type { <8 x double> }
+// CHECK-256-NEXT: %union.union_int8m4 = type { <128 x i8> }
+// CHECK-256-NEXT: %union.union_int16m4 = type { <64 x i16> }
+// CHECK-256-NEXT: %union.union_int32m4 = type { <32 x i32> }
+// CHECK-256-NEXT: %union.union_int64m4 = type { <16 x i64> }
+// CHECK-256-NEXT: %union.union_uint8m4 = type { <128 x i8> }
+// CHECK-256-NEXT: %union.union_uint16m4 = type { <64 x i16> }
+// CHECK-256-NEXT: %union.union_uint32m4 = type { <32 x i32> }
+// CHECK-256-NEXT: %union.union_uint64m4 = type { <16 x i64> }
+// CHECK-256-NEXT: %union.union_float32m4 = type { <32 x float> }
+// CHECK-256-NEXT: %union.union_float64m4 = type { <16 x double> }
+// CHECK-256-NEXT: %union.union_int8m8 = type { <256 x i8> }
+// CHECK-256-NEXT: %union.union_int16m8 = type { <128 x i16> }
+// CHECK-256-NEXT: %union.union_int32m8 = type { <64 x i32> }
+// CHECK-256-NEXT: %union.union_int64m8 = type { <32 x i64> }
+// CHECK-256-NEXT: %union.union_uint8m8 = type { <256 x i8> }
+// CHECK-256-NEXT: %union.union_uint16m8 = type { <128 x i16> }
+// CHECK-256-NEXT: %union.union_uint32m8 = type { <64 x i32> }
+// CHECK-256-NEXT: %union.union_uint64m8 = type { <32 x i64> }
+// CHECK-256-NEXT: %union.union_float32m8 = type { <64 x float> }
+// CHECK-256-NEXT: %union.union_float64m8 = type { <32 x double> }
 
 // CHECK-512:      %union.union_int8m1 = type { <64 x i8> }
 // CHECK-512-NEXT: %union.union_int16m1 = type { <32 x i16> }
@@ -230,6 +749,36 @@ void f() {
 // CHECK-512-NEXT: %union.union_uint64m1 = type { <8 x i64> }
 // CHECK-512-NEXT: %union.union_float32m1 = type { <16 x float> }
 // CHECK-512-NEXT: %union.union_float64m1 = type { <8 x double> }
+// CHECK-512-NEXT: %union.union_int8m2 = type { <128 x i8> }
+// CHECK-512-NEXT: %union.union_int16m2 = type { <64 x i16> }
+// CHECK-512-NEXT: %union.union_int32m2 = type { <32 x i32> }
+// CHECK-512-NEXT: %union.union_int64m2 = type { <16 x i64> }
+// CHECK-512-NEXT: %union.union_uint8m2 = type { <128 x i8> }
+// CHECK-512-NEXT: %union.union_uint16m2 = type { <64 x i16> }
+// CHECK-512-NEXT: %union.union_uint32m2 = type { <32 x i32> }
+// CHECK-512-NEXT: %union.union_uint64m2 = type { <16 x i64> }
+// CHECK-512-NEXT: %union.union_float32m2 = type { <32 x float> }
+// CHECK-512-NEXT: %union.union_float64m2 = type { <16 x double> }
+// CHECK-512-NEXT: %union.union_int8m4 = type { <256 x i8> }
+// CHECK-512-NEXT: %union.union_int16m4 = type { <128 x i16> }
+// CHECK-512-NEXT: %union.union_int32m4 = type { <64 x i32> }
+// CHECK-512-NEXT: %union.union_int64m4 = type { <32 x i64> }
+// CHECK-512-NEXT: %union.union_uint8m4 = type { <256 x i8> }
+// CHECK-512-NEXT: %union.union_uint16m4 = type { <128 x i16> }
+// CHECK-512-NEXT: %union.union_uint32m4 = type { <64 x i32> }
+// CHECK-512-NEXT: %union.union_uint64m4 = type { <32 x i64> }
+// CHECK-512-NEXT: %union.union_float32m4 = type { <64 x float> }
+// CHECK-512-NEXT: %union.union_float64m4 = type { <32 x double> }
+// CHECK-512-NEXT: %union.union_int8m8 = type { <512 x i8> }
+// CHECK-512-NEXT: %union.union_int16m8 = type { <256 x i16> }
+// CHECK-512-NEXT: %union.union_int32m8 = type { <128 x i32> }
+// CHECK-512-NEXT: %union.union_int64m8 = type { <64 x i64> }
+// CHECK-512-NEXT: %union.union_uint8m8 = type { <512 x i8> }
+// CHECK-512-NEXT: %union.union_uint16m8 = type { <256 x i16> }
+// CHECK-512-NEXT: %union.union_uint32m8 = type { <128 x i32> }
+// CHECK-512-NEXT: %union.union_uint64m8 = type { <64 x i64> }
+// CHECK-512-NEXT: %union.union_float32m8 = type { <128 x float> }
+// CHECK-512-NEXT: %union.union_float64m8 = type { <64 x double> }
 
 // CHECK-1024:      %union.union_int8m1 = type { <128 x i8> }
 // CHECK-1024-NEXT: %union.union_int16m1 = type { <64 x i16> }
@@ -241,6 +790,36 @@ void f() {
 // CHECK-1024-NEXT: %union.union_uint64m1 = type { <16 x i64> }
 // CHECK-1024-NEXT: %union.union_float32m1 = type { <32 x float> }
 // CHECK-1024-NEXT: %union.union_float64m1 = type { <16 x double> }
+// CHECK-1024-NEXT: %union.union_int8m2 = type { <256 x i8> }
+// CHECK-1024-NEXT: %union.union_int16m2 = type { <128 x i16> }
+// CHECK-1024-NEXT: %union.union_int32m2 = type { <64 x i32> }
+// CHECK-1024-NEXT: %union.union_int64m2 = type { <32 x i64> }
+// CHECK-1024-NEXT: %union.union_uint8m2 = type { <256 x i8> }
+// CHECK-1024-NEXT: %union.union_uint16m2 = type { <128 x i16> }
+// CHECK-1024-NEXT: %union.union_uint32m2 = type { <64 x i32> }
+// CHECK-1024-NEXT: %union.union_uint64m2 = type { <32 x i64> }
+// CHECK-1024-NEXT: %union.union_float32m2 = type { <64 x float> }
+// CHECK-1024-NEXT: %union.union_float64m2 = type { <32 x double> }
+// CHECK-1024-NEXT: %union.union_int8m4 = type { <512 x i8> }
+// CHECK-1024-NEXT: %union.union_int16m4 = type { <256 x i16> }
+// CHECK-1024-NEXT: %union.union_int32m4 = type { <128 x i32> }
+// CHECK-1024-NEXT: %union.union_int64m4 = type { <64 x i64> }
+// CHECK-1024-NEXT: %union.union_uint8m4 = type { <512 x i8> }
+// CHECK-1024-NEXT: %union.union_uint16m4 = type { <256 x i16> }
+// CHECK-1024-NEXT: %union.union_uint32m4 = type { <128 x i32> }
+// CHECK-1024-NEXT: %union.union_uint64m4 = type { <64 x i64> }
+// CHECK-1024-NEXT: %union.union_float32m4 = type { <128 x float> }
+// CHECK-1024-NEXT: %union.union_float64m4 = type { <64 x double> }
+// CHECK-1024-NEXT: %union.union_int8m8 = type { <1024 x i8> }
+// CHECK-1024-NEXT: %union.union_int16m8 = type { <512 x i16> }
+// CHECK-1024-NEXT: %union.union_int32m8 = type { <256 x i32> }
+// CHECK-1024-NEXT: %union.union_int64m8 = type { <128 x i64> }
+// CHECK-1024-NEXT: %union.union_uint8m8 = type { <1024 x i8> }
+// CHECK-1024-NEXT: %union.union_uint16m8 = type { <512 x i16> }
+// CHECK-1024-NEXT: %union.union_uint32m8 = type { <256 x i32> }
+// CHECK-1024-NEXT: %union.union_uint64m8 = type { <128 x i64> }
+// CHECK-1024-NEXT: %union.union_float32m8 = type { <256 x float> }
+// CHECK-1024-NEXT: %union.union_float64m8 = type { <128 x double> }
 
 //===----------------------------------------------------------------------===//
 // Global variables
@@ -255,6 +834,36 @@ void f() {
 // CHECK-64-NEXT: @global_u64 ={{.*}} global <1 x i64> zeroinitializer, align 8
 // CHECK-64-NEXT: @global_f32 ={{.*}} global <2 x float> zeroinitializer, align 8
 // CHECK-64-NEXT: @global_f64 ={{.*}} global <1 x double> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_i8m2 ={{.*}} global <16 x i8> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_i16m2 ={{.*}} global <8 x i16> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_i32m2 ={{.*}} global <4 x i32> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_i64m2 ={{.*}} global <2 x i64> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_u8m2 ={{.*}} global <16 x i8> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_u16m2 ={{.*}} global <8 x i16> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_u32m2 ={{.*}} global <4 x i32> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_u64m2 ={{.*}} global <2 x i64> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_f32m2 ={{.*}} global <4 x float> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_f64m2 ={{.*}} global <2 x double> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_i8m4 ={{.*}} global <32 x i8> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_i16m4 ={{.*}} global <16 x i16> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_i32m4 ={{.*}} global <8 x i32> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_i64m4 ={{.*}} global <4 x i64> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_u8m4 ={{.*}} global <32 x i8> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_u16m4 ={{.*}} global <16 x i16> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_u32m4 ={{.*}} global <8 x i32> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_u64m4 ={{.*}} global <4 x i64> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_f32m4 ={{.*}} global <8 x float> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_f64m4 ={{.*}} global <4 x double> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_i8m8 ={{.*}} global <64 x i8> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_i16m8 ={{.*}} global <32 x i16> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_i32m8 ={{.*}} global <16 x i32> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_i64m8 ={{.*}} global <8 x i64> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_u8m8 ={{.*}} global <64 x i8> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_u16m8 ={{.*}} global <32 x i16> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_u32m8 ={{.*}} global <16 x i32> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_u64m8 ={{.*}} global <8 x i64> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_f32m8 ={{.*}} global <16 x float> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_f64m8 ={{.*}} global <8 x double> zeroinitializer, align 8
 
 // CHECK-128:      @global_i8 ={{.*}} global <16 x i8> zeroinitializer, align 8
 // CHECK-128-NEXT: @global_i16 ={{.*}} global <8 x i16> zeroinitializer, align 8
@@ -266,6 +875,36 @@ void f() {
 // CHECK-128-NEXT: @global_u64 ={{.*}} global <2 x i64> zeroinitializer, align 8
 // CHECK-128-NEXT: @global_f32 ={{.*}} global <4 x float> zeroinitializer, align 8
 // CHECK-128-NEXT: @global_f64 ={{.*}} global <2 x double> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_i8m2 ={{.*}} global <32 x i8> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_i16m2 ={{.*}} global <16 x i16> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_i32m2 ={{.*}} global <8 x i32> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_i64m2 ={{.*}} global <4 x i64> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_u8m2 ={{.*}} global <32 x i8> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_u16m2 ={{.*}} global <16 x i16> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_u32m2 ={{.*}} global <8 x i32> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_u64m2 ={{.*}} global <4 x i64> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_f32m2 ={{.*}} global <8 x float> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_f64m2 ={{.*}} global <4 x double> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_i8m4 ={{.*}} global <64 x i8> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_i16m4 ={{.*}} global <32 x i16> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_i32m4 ={{.*}} global <16 x i32> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_i64m4 ={{.*}} global <8 x i64> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_u8m4 ={{.*}} global <64 x i8> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_u16m4 ={{.*}} global <32 x i16> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_u32m4 ={{.*}} global <16 x i32> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_u64m4 ={{.*}} global <8 x i64> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_f32m4 ={{.*}} global <16 x float> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_f64m4 ={{.*}} global <8 x double> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_i8m8 ={{.*}} global <128 x i8> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_i16m8 ={{.*}} global <64 x i16> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_i32m8 ={{.*}} global <32 x i32> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_i64m8 ={{.*}} global <16 x i64> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_u8m8 ={{.*}} global <128 x i8> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_u16m8 ={{.*}} global <64 x i16> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_u32m8 ={{.*}} global <32 x i32> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_u64m8 ={{.*}} global <16 x i64> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_f32m8 ={{.*}} global <32 x float> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_f64m8 ={{.*}} global <16 x double> zeroinitializer, align 8
 
 // CHECK-256:      @global_i8 ={{.*}} global <32 x i8> zeroinitializer, align 8
 // CHECK-256-NEXT: @global_i16 ={{.*}} global <16 x i16> zeroinitializer, align 8
@@ -277,6 +916,36 @@ void f() {
 // CHECK-256-NEXT: @global_u64 ={{.*}} global <4 x i64> zeroinitializer, align 8
 // CHECK-256-NEXT: @global_f32 ={{.*}} global <8 x float> zeroinitializer, align 8
 // CHECK-256-NEXT: @global_f64 ={{.*}} global <4 x double> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_i8m2 ={{.*}} global <64 x i8> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_i16m2 ={{.*}} global <32 x i16> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_i32m2 ={{.*}} global <16 x i32> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_i64m2 ={{.*}} global <8 x i64> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_u8m2 ={{.*}} global <64 x i8> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_u16m2 ={{.*}} global <32 x i16> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_u32m2 ={{.*}} global <16 x i32> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_u64m2 ={{.*}} global <8 x i64> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_f32m2 ={{.*}} global <16 x float> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_f64m2 ={{.*}} global <8 x double> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_i8m4 ={{.*}} global <128 x i8> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_i16m4 ={{.*}} global <64 x i16> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_i32m4 ={{.*}} global <32 x i32> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_i64m4 ={{.*}} global <16 x i64> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_u8m4 ={{.*}} global <128 x i8> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_u16m4 ={{.*}} global <64 x i16> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_u32m4 ={{.*}} global <32 x i32> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_u64m4 ={{.*}} global <16 x i64> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_f32m4 ={{.*}} global <32 x float> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_f64m4 ={{.*}} global <16 x double> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_i8m8 ={{.*}} global <256 x i8> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_i16m8 ={{.*}} global <128 x i16> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_i32m8 ={{.*}} global <64 x i32> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_i64m8 ={{.*}} global <32 x i64> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_u8m8 ={{.*}} global <256 x i8> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_u16m8 ={{.*}} global <128 x i16> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_u32m8 ={{.*}} global <64 x i32> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_u64m8 ={{.*}} global <32 x i64> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_f32m8 ={{.*}} global <64 x float> zeroinitializer, align 8
+// CHECK-256-NEXT: @global_f64m8 ={{.*}} global <32 x double> zeroinitializer, align 8
 
 // CHECK-512:      @global_i8 ={{.*}} global <64 x i8> zeroinitializer, align 8
 // CHECK-512-NEXT: @global_i16 ={{.*}} global <32 x i16> zeroinitializer, align 8
@@ -288,6 +957,36 @@ void f() {
 // CHECK-512-NEXT: @global_u64 ={{.*}} global <8 x i64> zeroinitializer, align 8
 // CHECK-512-NEXT: @global_f32 ={{.*}} global <16 x float> zeroinitializer, align 8
 // CHECK-512-NEXT: @global_f64 ={{.*}} global <8 x double> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_i8m2 ={{.*}} global <128 x i8> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_i16m2 ={{.*}} global <64 x i16> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_i32m2 ={{.*}} global <32 x i32> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_i64m2 ={{.*}} global <16 x i64> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_u8m2 ={{.*}} global <128 x i8> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_u16m2 ={{.*}} global <64 x i16> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_u32m2 ={{.*}} global <32 x i32> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_u64m2 ={{.*}} global <16 x i64> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_f32m2 ={{.*}} global <32 x float> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_f64m2 ={{.*}} global <16 x double> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_i8m4 ={{.*}} global <256 x i8> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_i16m4 ={{.*}} global <128 x i16> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_i32m4 ={{.*}} global <64 x i32> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_i64m4 ={{.*}} global <32 x i64> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_u8m4 ={{.*}} global <256 x i8> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_u16m4 ={{.*}} global <128 x i16> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_u32m4 ={{.*}} global <64 x i32> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_u64m4 ={{.*}} global <32 x i64> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_f32m4 ={{.*}} global <64 x float> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_f64m4 ={{.*}} global <32 x double> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_i8m8 ={{.*}} global <512 x i8> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_i16m8 ={{.*}} global <256 x i16> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_i32m8 ={{.*}} global <128 x i32> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_i64m8 ={{.*}} global <64 x i64> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_u8m8 ={{.*}} global <512 x i8> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_u16m8 ={{.*}} global <256 x i16> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_u32m8 ={{.*}} global <128 x i32> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_u64m8 ={{.*}} global <64 x i64> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_f32m8 ={{.*}} global <128 x float> zeroinitializer, align 8
+// CHECK-512-NEXT: @global_f64m8 ={{.*}} global <64 x double> zeroinitializer, align 8
 
 // CHECK-1024:      @global_i8 ={{.*}} global <128 x i8> zeroinitializer, align 8
 // CHECK-1024-NEXT: @global_i16 ={{.*}} global <64 x i16> zeroinitializer, align 8
@@ -299,6 +998,36 @@ void f() {
 // CHECK-1024-NEXT: @global_u64 ={{.*}} global <16 x i64> zeroinitializer, align 8
 // CHECK-1024-NEXT: @global_f32 ={{.*}} global <32 x float> zeroinitializer, align 8
 // CHECK-1024-NEXT: @global_f64 ={{.*}} global <16 x double> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_i8m2 ={{.*}} global <256 x i8> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_i16m2 ={{.*}} global <128 x i16> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_i32m2 ={{.*}} global <64 x i32> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_i64m2 ={{.*}} global <32 x i64> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_u8m2 ={{.*}} global <256 x i8> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_u16m2 ={{.*}} global <128 x i16> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_u32m2 ={{.*}} global <64 x i32> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_u64m2 ={{.*}} global <32 x i64> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_f32m2 ={{.*}} global <64 x float> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_f64m2 ={{.*}} global <32 x double> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_i8m4 ={{.*}} global <512 x i8> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_i16m4 ={{.*}} global <256 x i16> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_i32m4 ={{.*}} global <128 x i32> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_i64m4 ={{.*}} global <64 x i64> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_u8m4 ={{.*}} global <512 x i8> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_u16m4 ={{.*}} global <256 x i16> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_u32m4 ={{.*}} global <128 x i32> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_u64m4 ={{.*}} global <64 x i64> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_f32m4 ={{.*}} global <128 x float> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_f64m4 ={{.*}} global <64 x double> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_i8m8 ={{.*}} global <1024 x i8> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_i16m8 ={{.*}} global <512 x i16> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_i32m8 ={{.*}} global <256 x i32> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_i64m8 ={{.*}} global <128 x i64> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_u8m8 ={{.*}} global <1024 x i8> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_u16m8 ={{.*}} global <512 x i16> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_u32m8 ={{.*}} global <256 x i32> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_u64m8 ={{.*}} global <128 x i64> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_f32m8 ={{.*}} global <256 x float> zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_f64m8 ={{.*}} global <128 x double> zeroinitializer, align 8
 
 //===----------------------------------------------------------------------===//
 // Global arrays
@@ -313,6 +1042,36 @@ void f() {
 // CHECK-64-NEXT: @global_arr_u64 ={{.*}} global [3 x <1 x i64>] zeroinitializer, align 8
 // CHECK-64-NEXT: @global_arr_f32 ={{.*}} global [3 x <2 x float>] zeroinitializer, align 8
 // CHECK-64-NEXT: @global_arr_f64 ={{.*}} global [3 x <1 x double>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_i8m2 ={{.*}} global [3 x <16 x i8>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_i16m2 ={{.*}} global [3 x <8 x i16>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_i32m2 ={{.*}} global [3 x <4 x i32>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_i64m2 ={{.*}} global [3 x <2 x i64>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_u8m2 ={{.*}} global [3 x <16 x i8>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_u16m2 ={{.*}} global [3 x <8 x i16>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_u32m2 ={{.*}} global [3 x <4 x i32>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_u64m2 ={{.*}} global [3 x <2 x i64>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_f32m2 ={{.*}} global [3 x <4 x float>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_f64m2 ={{.*}} global [3 x <2 x double>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_i8m4 ={{.*}} global [3 x <32 x i8>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_i16m4 ={{.*}} global [3 x <16 x i16>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_i32m4 ={{.*}} global [3 x <8 x i32>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_i64m4 ={{.*}} global [3 x <4 x i64>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_u8m4 ={{.*}} global [3 x <32 x i8>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_u16m4 ={{.*}} global [3 x <16 x i16>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_u32m4 ={{.*}} global [3 x <8 x i32>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_u64m4 ={{.*}} global [3 x <4 x i64>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_f32m4 ={{.*}} global [3 x <8 x float>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_f64m4 ={{.*}} global [3 x <4 x double>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_i8m8 ={{.*}} global [3 x <64 x i8>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_i16m8 ={{.*}} global [3 x <32 x i16>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_i32m8 ={{.*}} global [3 x <16 x i32>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_i64m8 ={{.*}} global [3 x <8 x i64>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_u8m8 ={{.*}} global [3 x <64 x i8>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_u16m8 ={{.*}} global [3 x <32 x i16>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_u32m8 ={{.*}} global [3 x <16 x i32>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_u64m8 ={{.*}} global [3 x <8 x i64>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_f32m8 ={{.*}} global [3 x <16 x float>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_f64m8 ={{.*}} global [3 x <8 x double>] zeroinitializer, align 8
 
 // CHECK-128:      @global_arr_i8 ={{.*}} global [3 x <16 x i8>] zeroinitializer, align 8
 // CHECK-128-NEXT: @global_arr_i16 ={{.*}} global [3 x <8 x i16>] zeroinitializer, align 8
@@ -324,6 +1083,36 @@ void f() {
 // CHECK-128-NEXT: @global_arr_u64 ={{.*}} global [3 x <2 x i64>] zeroinitializer, align 8
 // CHECK-128-NEXT: @global_arr_f32 ={{.*}} global [3 x <4 x float>] zeroinitializer, align 8
 // CHECK-128-NEXT: @global_arr_f64 ={{.*}} global [3 x <2 x double>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_i8m2 ={{.*}} global [3 x <32 x i8>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_i16m2 ={{.*}} global [3 x <16 x i16>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_i32m2 ={{.*}} global [3 x <8 x i32>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_i64m2 ={{.*}} global [3 x <4 x i64>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_u8m2 ={{.*}} global [3 x <32 x i8>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_u16m2 ={{.*}} global [3 x <16 x i16>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_u32m2 ={{.*}} global [3 x <8 x i32>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_u64m2 ={{.*}} global [3 x <4 x i64>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_f32m2 ={{.*}} global [3 x <8 x float>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_f64m2 ={{.*}} global [3 x <4 x double>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_i8m4 ={{.*}} global [3 x <64 x i8>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_i16m4 ={{.*}} global [3 x <32 x i16>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_i32m4 ={{.*}} global [3 x <16 x i32>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_i64m4 ={{.*}} global [3 x <8 x i64>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_u8m4 ={{.*}} global [3 x <64 x i8>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_u16m4 ={{.*}} global [3 x <32 x i16>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_u32m4 ={{.*}} global [3 x <16 x i32>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_u64m4 ={{.*}} global [3 x <8 x i64>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_f32m4 ={{.*}} global [3 x <16 x float>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_f64m4 ={{.*}} global [3 x <8 x double>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_i8m8 ={{.*}} global [3 x <128 x i8>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_i16m8 ={{.*}} global [3 x <64 x i16>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_i32m8 ={{.*}} global [3 x <32 x i32>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_i64m8 ={{.*}} global [3 x <16 x i64>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_u8m8 ={{.*}} global [3 x <128 x i8>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_u16m8 ={{.*}} global [3 x <64 x i16>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_u32m8 ={{.*}} global [3 x <32 x i32>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_u64m8 ={{.*}} global [3 x <16 x i64>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_f32m8 ={{.*}} global [3 x <32 x float>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_f64m8 ={{.*}} global [3 x <16 x double>] zeroinitializer, align 8
 
 // CHECK-256:      @global_arr_i8 ={{.*}} global [3 x <32 x i8>] zeroinitializer, align 8
 // CHECK-256-NEXT: @global_arr_i16 ={{.*}} global [3 x <16 x i16>] zeroinitializer, align 8
@@ -335,7 +1124,36 @@ void f() {
 // CHECK-256-NEXT: @global_arr_u64 ={{.*}} global [3 x <4 x i64>] zeroinitializer, align 8
 // CHECK-256-NEXT: @global_arr_f32 ={{.*}} global [3 x <8 x float>] zeroinitializer, align 8
 // CHECK-256-NEXT: @global_arr_f64 ={{.*}} global [3 x <4 x double>] zeroinitializer, align 8
-
+// CHECK-256-NEXT: @global_arr_i8m2 ={{.*}} global [3 x <64 x i8>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_i16m2 ={{.*}} global [3 x <32 x i16>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_i32m2 ={{.*}} global [3 x <16 x i32>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_i64m2 ={{.*}} global [3 x <8 x i64>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_u8m2 ={{.*}} global [3 x <64 x i8>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_u16m2 ={{.*}} global [3 x <32 x i16>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_u32m2 ={{.*}} global [3 x <16 x i32>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_u64m2 ={{.*}} global [3 x <8 x i64>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_f32m2 ={{.*}} global [3 x <16 x float>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_f64m2 ={{.*}} global [3 x <8 x double>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_i8m4 ={{.*}} global [3 x <128 x i8>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_i16m4 ={{.*}} global [3 x <64 x i16>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_i32m4 ={{.*}} global [3 x <32 x i32>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_i64m4 ={{.*}} global [3 x <16 x i64>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_u8m4 ={{.*}} global [3 x <128 x i8>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_u16m4 ={{.*}} global [3 x <64 x i16>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_u32m4 ={{.*}} global [3 x <32 x i32>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_u64m4 ={{.*}} global [3 x <16 x i64>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_f32m4 ={{.*}} global [3 x <32 x float>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_f64m4 ={{.*}} global [3 x <16 x double>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_i8m8 ={{.*}} global [3 x <256 x i8>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_i16m8 ={{.*}} global [3 x <128 x i16>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_i32m8 ={{.*}} global [3 x <64 x i32>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_i64m8 ={{.*}} global [3 x <32 x i64>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_u8m8 ={{.*}} global [3 x <256 x i8>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_u16m8 ={{.*}} global [3 x <128 x i16>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_u32m8 ={{.*}} global [3 x <64 x i32>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_u64m8 ={{.*}} global [3 x <32 x i64>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_f32m8 ={{.*}} global [3 x <64 x float>] zeroinitializer, align 8
+// CHECK-256-NEXT: @global_arr_f64m8 ={{.*}} global [3 x <32 x double>] zeroinitializer, align 8
 // CHECK-512:      @global_arr_i8 ={{.*}} global [3 x <64 x i8>] zeroinitializer, align 8
 // CHECK-512-NEXT: @global_arr_i16 ={{.*}} global [3 x <32 x i16>] zeroinitializer, align 8
 // CHECK-512-NEXT: @global_arr_i32 ={{.*}} global [3 x <16 x i32>] zeroinitializer, align 8
@@ -346,6 +1164,36 @@ void f() {
 // CHECK-512-NEXT: @global_arr_u64 ={{.*}} global [3 x <8 x i64>] zeroinitializer, align 8
 // CHECK-512-NEXT: @global_arr_f32 ={{.*}} global [3 x <16 x float>] zeroinitializer, align 8
 // CHECK-512-NEXT: @global_arr_f64 ={{.*}} global [3 x <8 x double>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_i8m2 ={{.*}} global [3 x <128 x i8>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_i16m2 ={{.*}} global [3 x <64 x i16>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_i32m2 ={{.*}} global [3 x <32 x i32>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_i64m2 ={{.*}} global [3 x <16 x i64>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_u8m2 ={{.*}} global [3 x <128 x i8>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_u16m2 ={{.*}} global [3 x <64 x i16>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_u32m2 ={{.*}} global [3 x <32 x i32>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_u64m2 ={{.*}} global [3 x <16 x i64>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_f32m2 ={{.*}} global [3 x <32 x float>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_f64m2 ={{.*}} global [3 x <16 x double>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_i8m4 ={{.*}} global [3 x <256 x i8>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_i16m4 ={{.*}} global [3 x <128 x i16>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_i32m4 ={{.*}} global [3 x <64 x i32>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_i64m4 ={{.*}} global [3 x <32 x i64>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_u8m4 ={{.*}} global [3 x <256 x i8>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_u16m4 ={{.*}} global [3 x <128 x i16>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_u32m4 ={{.*}} global [3 x <64 x i32>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_u64m4 ={{.*}} global [3 x <32 x i64>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_f32m4 ={{.*}} global [3 x <64 x float>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_f64m4 ={{.*}} global [3 x <32 x double>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_i8m8 ={{.*}} global [3 x <512 x i8>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_i16m8 ={{.*}} global [3 x <256 x i16>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_i32m8 ={{.*}} global [3 x <128 x i32>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_i64m8 ={{.*}} global [3 x <64 x i64>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_u8m8 ={{.*}} global [3 x <512 x i8>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_u16m8 ={{.*}} global [3 x <256 x i16>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_u32m8 ={{.*}} global [3 x <128 x i32>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_u64m8 ={{.*}} global [3 x <64 x i64>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_f32m8 ={{.*}} global [3 x <128 x float>] zeroinitializer, align 8
+// CHECK-512-NEXT: @global_arr_f64m8 ={{.*}} global [3 x <64 x double>] zeroinitializer, align 8
 
 // CHECK-1024:      @global_arr_i8 ={{.*}} global [3 x <128 x i8>] zeroinitializer, align 8
 // CHECK-1024-NEXT: @global_arr_i16 ={{.*}} global [3 x <64 x i16>] zeroinitializer, align 8
@@ -357,6 +1205,36 @@ void f() {
 // CHECK-1024-NEXT: @global_arr_u64 ={{.*}} global [3 x <16 x i64>] zeroinitializer, align 8
 // CHECK-1024-NEXT: @global_arr_f32 ={{.*}} global [3 x <32 x float>] zeroinitializer, align 8
 // CHECK-1024-NEXT: @global_arr_f64 ={{.*}} global [3 x <16 x double>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_i8m2 ={{.*}} global [3 x <256 x i8>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_i16m2 ={{.*}} global [3 x <128 x i16>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_i32m2 ={{.*}} global [3 x <64 x i32>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_i64m2 ={{.*}} global [3 x <32 x i64>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_u8m2 ={{.*}} global [3 x <256 x i8>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_u16m2 ={{.*}} global [3 x <128 x i16>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_u32m2 ={{.*}} global [3 x <64 x i32>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_u64m2 ={{.*}} global [3 x <32 x i64>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_f32m2 ={{.*}} global [3 x <64 x float>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_f64m2 ={{.*}} global [3 x <32 x double>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_i8m4 ={{.*}} global [3 x <512 x i8>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_i16m4 ={{.*}} global [3 x <256 x i16>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_i32m4 ={{.*}} global [3 x <128 x i32>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_i64m4 ={{.*}} global [3 x <64 x i64>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_u8m4 ={{.*}} global [3 x <512 x i8>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_u16m4 ={{.*}} global [3 x <256 x i16>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_u32m4 ={{.*}} global [3 x <128 x i32>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_u64m4 ={{.*}} global [3 x <64 x i64>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_f32m4 ={{.*}} global [3 x <128 x float>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_f64m4 ={{.*}} global [3 x <64 x double>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_i8m8 ={{.*}} global [3 x <1024 x i8>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_i16m8 ={{.*}} global [3 x <512 x i16>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_i32m8 ={{.*}} global [3 x <256 x i32>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_i64m8 ={{.*}} global [3 x <128 x i64>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_u8m8 ={{.*}} global [3 x <1024 x i8>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_u16m8 ={{.*}} global [3 x <512 x i16>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_u32m8 ={{.*}} global [3 x <256 x i32>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_u64m8 ={{.*}} global [3 x <128 x i64>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_f32m8 ={{.*}} global [3 x <256 x float>] zeroinitializer, align 8
+// CHECK-1024-NEXT: @global_arr_f64m8 ={{.*}} global [3 x <128 x double>] zeroinitializer, align 8
 
 //===----------------------------------------------------------------------===//
 // Local variables
@@ -371,6 +1249,36 @@ void f() {
 // CHECK-64-NEXT: %local_u64 = alloca <1 x i64>, align 8
 // CHECK-64-NEXT: %local_f32 = alloca <2 x float>, align 8
 // CHECK-64-NEXT: %local_f64 = alloca <1 x double>, align 8
+// CHECK-64-NEXT: %local_i8m2 = alloca <16 x i8>, align 8
+// CHECK-64-NEXT: %local_i16m2 = alloca <8 x i16>, align 8
+// CHECK-64-NEXT: %local_i32m2 = alloca <4 x i32>, align 8
+// CHECK-64-NEXT: %local_i64m2 = alloca <2 x i64>, align 8
+// CHECK-64-NEXT: %local_u8m2 = alloca <16 x i8>, align 8
+// CHECK-64-NEXT: %local_u16m2 = alloca <8 x i16>, align 8
+// CHECK-64-NEXT: %local_u32m2 = alloca <4 x i32>, align 8
+// CHECK-64-NEXT: %local_u64m2 = alloca <2 x i64>, align 8
+// CHECK-64-NEXT: %local_f32m2 = alloca <4 x float>, align 8
+// CHECK-64-NEXT: %local_f64m2 = alloca <2 x double>, align 8
+// CHECK-64-NEXT: %local_i8m4 = alloca <32 x i8>, align 8
+// CHECK-64-NEXT: %local_i16m4 = alloca <16 x i16>, align 8
+// CHECK-64-NEXT: %local_i32m4 = alloca <8 x i32>, align 8
+// CHECK-64-NEXT: %local_i64m4 = alloca <4 x i64>, align 8
+// CHECK-64-NEXT: %local_u8m4 = alloca <32 x i8>, align 8
+// CHECK-64-NEXT: %local_u16m4 = alloca <16 x i16>, align 8
+// CHECK-64-NEXT: %local_u32m4 = alloca <8 x i32>, align 8
+// CHECK-64-NEXT: %local_u64m4 = alloca <4 x i64>, align 8
+// CHECK-64-NEXT: %local_f32m4 = alloca <8 x float>, align 8
+// CHECK-64-NEXT: %local_f64m4 = alloca <4 x double>, align 8
+// CHECK-64-NEXT: %local_i8m8 = alloca <64 x i8>, align 8
+// CHECK-64-NEXT: %local_i16m8 = alloca <32 x i16>, align 8
+// CHECK-64-NEXT: %local_i32m8 = alloca <16 x i32>, align 8
+// CHECK-64-NEXT: %local_i64m8 = alloca <8 x i64>, align 8
+// CHECK-64-NEXT: %local_u8m8 = alloca <64 x i8>, align 8
+// CHECK-64-NEXT: %local_u16m8 = alloca <32 x i16>, align 8
+// CHECK-64-NEXT: %local_u32m8 = alloca <16 x i32>, align 8
+// CHECK-64-NEXT: %local_u64m8 = alloca <8 x i64>, align 8
+// CHECK-64-NEXT: %local_f32m8 = alloca <16 x float>, align 8
+// CHECK-64-NEXT: %local_f64m8 = alloca <8 x double>, align 8
 
 // CHECK-128:      %local_i8 = alloca <16 x i8>, align 8
 // CHECK-128-NEXT: %local_i16 = alloca <8 x i16>, align 8
@@ -382,6 +1290,36 @@ void f() {
 // CHECK-128-NEXT: %local_u64 = alloca <2 x i64>, align 8
 // CHECK-128-NEXT: %local_f32 = alloca <4 x float>, align 8
 // CHECK-128-NEXT: %local_f64 = alloca <2 x double>, align 8
+// CHECK-128-NEXT: %local_i8m2 = alloca <32 x i8>, align 8
+// CHECK-128-NEXT: %local_i16m2 = alloca <16 x i16>, align 8
+// CHECK-128-NEXT: %local_i32m2 = alloca <8 x i32>, align 8
+// CHECK-128-NEXT: %local_i64m2 = alloca <4 x i64>, align 8
+// CHECK-128-NEXT: %local_u8m2 = alloca <32 x i8>, align 8
+// CHECK-128-NEXT: %local_u16m2 = alloca <16 x i16>, align 8
+// CHECK-128-NEXT: %local_u32m2 = alloca <8 x i32>, align 8
+// CHECK-128-NEXT: %local_u64m2 = alloca <4 x i64>, align 8
+// CHECK-128-NEXT: %local_f32m2 = alloca <8 x float>, align 8
+// CHECK-128-NEXT: %local_f64m2 = alloca <4 x double>, align 8
+// CHECK-128-NEXT: %local_i8m4 = alloca <64 x i8>, align 8
+// CHECK-128-NEXT: %local_i16m4 = alloca <32 x i16>, align 8
+// CHECK-128-NEXT: %local_i32m4 = alloca <16 x i32>, align 8
+// CHECK-128-NEXT: %local_i64m4 = alloca <8 x i64>, align 8
+// CHECK-128-NEXT: %local_u8m4 = alloca <64 x i8>, align 8
+// CHECK-128-NEXT: %local_u16m4 = alloca <32 x i16>, align 8
+// CHECK-128-NEXT: %local_u32m4 = alloca <16 x i32>, align 8
+// CHECK-128-NEXT: %local_u64m4 = alloca <8 x i64>, align 8
+// CHECK-128-NEXT: %local_f32m4 = alloca <16 x float>, align 8
+// CHECK-128-NEXT: %local_f64m4 = alloca <8 x double>, align 8
+// CHECK-128-NEXT: %local_i8m8 = alloca <128 x i8>, align 8
+// CHECK-128-NEXT: %local_i16m8 = alloca <64 x i16>, align 8
+// CHECK-128-NEXT: %local_i32m8 = alloca <32 x i32>, align 8
+// CHECK-128-NEXT: %local_i64m8 = alloca <16 x i64>, align 8
+// CHECK-128-NEXT: %local_u8m8 = alloca <128 x i8>, align 8
+// CHECK-128-NEXT: %local_u16m8 = alloca <64 x i16>, align 8
+// CHECK-128-NEXT: %local_u32m8 = alloca <32 x i32>, align 8
+// CHECK-128-NEXT: %local_u64m8 = alloca <16 x i64>, align 8
+// CHECK-128-NEXT: %local_f32m8 = alloca <32 x float>, align 8
+// CHECK-128-NEXT: %local_f64m8 = alloca <16 x double>, align 8
 
 // CHECK-256:      %local_i8 = alloca <32 x i8>, align 8
 // CHECK-256-NEXT: %local_i16 = alloca <16 x i16>, align 8
@@ -393,6 +1331,36 @@ void f() {
 // CHECK-256-NEXT: %local_u64 = alloca <4 x i64>, align 8
 // CHECK-256-NEXT: %local_f32 = alloca <8 x float>, align 8
 // CHECK-256-NEXT: %local_f64 = alloca <4 x double>, align 8
+// CHECK-256-NEXT: %local_i8m2 = alloca <64 x i8>, align 8
+// CHECK-256-NEXT: %local_i16m2 = alloca <32 x i16>, align 8
+// CHECK-256-NEXT: %local_i32m2 = alloca <16 x i32>, align 8
+// CHECK-256-NEXT: %local_i64m2 = alloca <8 x i64>, align 8
+// CHECK-256-NEXT: %local_u8m2 = alloca <64 x i8>, align 8
+// CHECK-256-NEXT: %local_u16m2 = alloca <32 x i16>, align 8
+// CHECK-256-NEXT: %local_u32m2 = alloca <16 x i32>, align 8
+// CHECK-256-NEXT: %local_u64m2 = alloca <8 x i64>, align 8
+// CHECK-256-NEXT: %local_f32m2 = alloca <16 x float>, align 8
+// CHECK-256-NEXT: %local_f64m2 = alloca <8 x double>, align 8
+// CHECK-256-NEXT: %local_i8m4 = alloca <128 x i8>, align 8
+// CHECK-256-NEXT: %local_i16m4 = alloca <64 x i16>, align 8
+// CHECK-256-NEXT: %local_i32m4 = alloca <32 x i32>, align 8
+// CHECK-256-NEXT: %local_i64m4 = alloca <16 x i64>, align 8
+// CHECK-256-NEXT: %local_u8m4 = alloca <128 x i8>, align 8
+// CHECK-256-NEXT: %local_u16m4 = alloca <64 x i16>, align 8
+// CHECK-256-NEXT: %local_u32m4 = alloca <32 x i32>, align 8
+// CHECK-256-NEXT: %local_u64m4 = alloca <16 x i64>, align 8
+// CHECK-256-NEXT: %local_f32m4 = alloca <32 x float>, align 8
+// CHECK-256-NEXT: %local_f64m4 = alloca <16 x double>, align 8
+// CHECK-256-NEXT: %local_i8m8 = alloca <256 x i8>, align 8
+// CHECK-256-NEXT: %local_i16m8 = alloca <128 x i16>, align 8
+// CHECK-256-NEXT: %local_i32m8 = alloca <64 x i32>, align 8
+// CHECK-256-NEXT: %local_i64m8 = alloca <32 x i64>, align 8
+// CHECK-256-NEXT: %local_u8m8 = alloca <256 x i8>, align 8
+// CHECK-256-NEXT: %local_u16m8 = alloca <128 x i16>, align 8
+// CHECK-256-NEXT: %local_u32m8 = alloca <64 x i32>, align 8
+// CHECK-256-NEXT: %local_u64m8 = alloca <32 x i64>, align 8
+// CHECK-256-NEXT: %local_f32m8 = alloca <64 x float>, align 8
+// CHECK-256-NEXT: %local_f64m8 = alloca <32 x double>, align 8
 
 // CHECK-512:      %local_i8 = alloca <64 x i8>, align 8
 // CHECK-512-NEXT: %local_i16 = alloca <32 x i16>, align 8
@@ -404,6 +1372,36 @@ void f() {
 // CHECK-512-NEXT: %local_u64 = alloca <8 x i64>, align 8
 // CHECK-512-NEXT: %local_f32 = alloca <16 x float>, align 8
 // CHECK-512-NEXT: %local_f64 = alloca <8 x double>, align 8
+// CHECK-512-NEXT: %local_i8m2 = alloca <128 x i8>, align 8
+// CHECK-512-NEXT: %local_i16m2 = alloca <64 x i16>, align 8
+// CHECK-512-NEXT: %local_i32m2 = alloca <32 x i32>, align 8
+// CHECK-512-NEXT: %local_i64m2 = alloca <16 x i64>, align 8
+// CHECK-512-NEXT: %local_u8m2 = alloca <128 x i8>, align 8
+// CHECK-512-NEXT: %local_u16m2 = alloca <64 x i16>, align 8
+// CHECK-512-NEXT: %local_u32m2 = alloca <32 x i32>, align 8
+// CHECK-512-NEXT: %local_u64m2 = alloca <16 x i64>, align 8
+// CHECK-512-NEXT: %local_f32m2 = alloca <32 x float>, align 8
+// CHECK-512-NEXT: %local_f64m2 = alloca <16 x double>, align 8
+// CHECK-512-NEXT: %local_i8m4 = alloca <256 x i8>, align 8
+// CHECK-512-NEXT: %local_i16m4 = alloca <128 x i16>, align 8
+// CHECK-512-NEXT: %local_i32m4 = alloca <64 x i32>, align 8
+// CHECK-512-NEXT: %local_i64m4 = alloca <32 x i64>, align 8
+// CHECK-512-NEXT: %local_u8m4 = alloca <256 x i8>, align 8
+// CHECK-512-NEXT: %local_u16m4 = alloca <128 x i16>, align 8
+// CHECK-512-NEXT: %local_u32m4 = alloca <64 x i32>, align 8
+// CHECK-512-NEXT: %local_u64m4 = alloca <32 x i64>, align 8
+// CHECK-512-NEXT: %local_f32m4 = alloca <64 x float>, align 8
+// CHECK-512-NEXT: %local_f64m4 = alloca <32 x double>, align 8
+// CHECK-512-NEXT: %local_i8m8 = alloca <512 x i8>, align 8
+// CHECK-512-NEXT: %local_i16m8 = alloca <256 x i16>, align 8
+// CHECK-512-NEXT: %local_i32m8 = alloca <128 x i32>, align 8
+// CHECK-512-NEXT: %local_i64m8 = alloca <64 x i64>, align 8
+// CHECK-512-NEXT: %local_u8m8 = alloca <512 x i8>, align 8
+// CHECK-512-NEXT: %local_u16m8 = alloca <256 x i16>, align 8
+// CHECK-512-NEXT: %local_u32m8 = alloca <128 x i32>, align 8
+// CHECK-512-NEXT: %local_u64m8 = alloca <64 x i64>, align 8
+// CHECK-512-NEXT: %local_f32m8 = alloca <128 x float>, align 8
+// CHECK-512-NEXT: %local_f64m8 = alloca <64 x double>, align 8
 
 // CHECK-1024:       %local_i8 = alloca <128 x i8>, align 8
 // CHECK-1024-NEXT:  %local_i16 = alloca <64 x i16>, align 8
@@ -415,6 +1413,36 @@ void f() {
 // CHECK-1024-NEXT:  %local_u64 = alloca <16 x i64>, align 8
 // CHECK-1024-NEXT:  %local_f32 = alloca <32 x float>, align 8
 // CHECK-1024-NEXT:  %local_f64 = alloca <16 x double>, align 8
+// CHECK-1024-NEXT:  %local_i8m2 = alloca <256 x i8>, align 8
+// CHECK-1024-NEXT:  %local_i16m2 = alloca <128 x i16>, align 8
+// CHECK-1024-NEXT:  %local_i32m2 = alloca <64 x i32>, align 8
+// CHECK-1024-NEXT:  %local_i64m2 = alloca <32 x i64>, align 8
+// CHECK-1024-NEXT:  %local_u8m2 = alloca <256 x i8>, align 8
+// CHECK-1024-NEXT:  %local_u16m2 = alloca <128 x i16>, align 8
+// CHECK-1024-NEXT:  %local_u32m2 = alloca <64 x i32>, align 8
+// CHECK-1024-NEXT:  %local_u64m2 = alloca <32 x i64>, align 8
+// CHECK-1024-NEXT:  %local_f32m2 = alloca <64 x float>, align 8
+// CHECK-1024-NEXT:  %local_f64m2 = alloca <32 x double>, align 8
+// CHECK-1024-NEXT:  %local_i8m4 = alloca <512 x i8>, align 8
+// CHECK-1024-NEXT:  %local_i16m4 = alloca <256 x i16>, align 8
+// CHECK-1024-NEXT:  %local_i32m4 = alloca <128 x i32>, align 8
+// CHECK-1024-NEXT:  %local_i64m4 = alloca <64 x i64>, align 8
+// CHECK-1024-NEXT:  %local_u8m4 = alloca <512 x i8>, align 8
+// CHECK-1024-NEXT:  %local_u16m4 = alloca <256 x i16>, align 8
+// CHECK-1024-NEXT:  %local_u32m4 = alloca <128 x i32>, align 8
+// CHECK-1024-NEXT:  %local_u64m4 = alloca <64 x i64>, align 8
+// CHECK-1024-NEXT:  %local_f32m4 = alloca <128 x float>, align 8
+// CHECK-1024-NEXT:  %local_f64m4 = alloca <64 x double>, align 8
+// CHECK-1024-NEXT:  %local_i8m8 = alloca <1024 x i8>, align 8
+// CHECK-1024-NEXT:  %local_i16m8 = alloca <512 x i16>, align 8
+// CHECK-1024-NEXT:  %local_i32m8 = alloca <256 x i32>, align 8
+// CHECK-1024-NEXT:  %local_i64m8 = alloca <128 x i64>, align 8
+// CHECK-1024-NEXT:  %local_u8m8 = alloca <1024 x i8>, align 8
+// CHECK-1024-NEXT:  %local_u16m8 = alloca <512 x i16>, align 8
+// CHECK-1024-NEXT:  %local_u32m8 = alloca <256 x i32>, align 8
+// CHECK-1024-NEXT:  %local_u64m8 = alloca <128 x i64>, align 8
+// CHECK-1024-NEXT:  %local_f32m8 = alloca <256 x float>, align 8
+// CHECK-1024-NEXT:  %local_f64m8 = alloca <128 x double>, align 8
 
 //===----------------------------------------------------------------------===//
 // Local arrays
@@ -429,6 +1457,36 @@ void f() {
 // CHECK-64-NEXT: %local_arr_u64 = alloca [3 x <1 x i64>], align 8
 // CHECK-64-NEXT: %local_arr_f32 = alloca [3 x <2 x float>], align 8
 // CHECK-64-NEXT: %local_arr_f64 = alloca [3 x <1 x double>], align 8
+// CHECK-64-NEXT: %local_arr_i8m2 = alloca [3 x <16 x i8>], align 8
+// CHECK-64-NEXT: %local_arr_i16m2 = alloca [3 x <8 x i16>], align 8
+// CHECK-64-NEXT: %local_arr_i32m2 = alloca [3 x <4 x i32>], align 8
+// CHECK-64-NEXT: %local_arr_i64m2 = alloca [3 x <2 x i64>], align 8
+// CHECK-64-NEXT: %local_arr_u8m2 = alloca [3 x <16 x i8>], align 8
+// CHECK-64-NEXT: %local_arr_u16m2 = alloca [3 x <8 x i16>], align 8
+// CHECK-64-NEXT: %local_arr_u32m2 = alloca [3 x <4 x i32>], align 8
+// CHECK-64-NEXT: %local_arr_u64m2 = alloca [3 x <2 x i64>], align 8
+// CHECK-64-NEXT: %local_arr_f32m2 = alloca [3 x <4 x float>], align 8
+// CHECK-64-NEXT: %local_arr_f64m2 = alloca [3 x <2 x double>], align 8
+// CHECK-64-NEXT: %local_arr_i8m4 = alloca [3 x <32 x i8>], align 8
+// CHECK-64-NEXT: %local_arr_i16m4 = alloca [3 x <16 x i16>], align 8
+// CHECK-64-NEXT: %local_arr_i32m4 = alloca [3 x <8 x i32>], align 8
+// CHECK-64-NEXT: %local_arr_i64m4 = alloca [3 x <4 x i64>], align 8
+// CHECK-64-NEXT: %local_arr_u8m4 = alloca [3 x <32 x i8>], align 8
+// CHECK-64-NEXT: %local_arr_u16m4 = alloca [3 x <16 x i16>], align 8
+// CHECK-64-NEXT: %local_arr_u32m4 = alloca [3 x <8 x i32>], align 8
+// CHECK-64-NEXT: %local_arr_u64m4 = alloca [3 x <4 x i64>], align 8
+// CHECK-64-NEXT: %local_arr_f32m4 = alloca [3 x <8 x float>], align 8
+// CHECK-64-NEXT: %local_arr_f64m4 = alloca [3 x <4 x double>], align 8
+// CHECK-64-NEXT: %local_arr_i8m8 = alloca [3 x <64 x i8>], align 8
+// CHECK-64-NEXT: %local_arr_i16m8 = alloca [3 x <32 x i16>], align 8
+// CHECK-64-NEXT: %local_arr_i32m8 = alloca [3 x <16 x i32>], align 8
+// CHECK-64-NEXT: %local_arr_i64m8 = alloca [3 x <8 x i64>], align 8
+// CHECK-64-NEXT: %local_arr_u8m8 = alloca [3 x <64 x i8>], align 8
+// CHECK-64-NEXT: %local_arr_u16m8 = alloca [3 x <32 x i16>], align 8
+// CHECK-64-NEXT: %local_arr_u32m8 = alloca [3 x <16 x i32>], align 8
+// CHECK-64-NEXT: %local_arr_u64m8 = alloca [3 x <8 x i64>], align 8
+// CHECK-64-NEXT: %local_arr_f32m8 = alloca [3 x <16 x float>], align 8
+// CHECK-64-NEXT: %local_arr_f64m8 = alloca [3 x <8 x double>], align 8
 
 // CHECK-128:      %local_arr_i8 = alloca [3 x <16 x i8>], align 8
 // CHECK-128-NEXT: %local_arr_i16 = alloca [3 x <8 x i16>], align 8
@@ -440,6 +1498,36 @@ void f() {
 // CHECK-128-NEXT: %local_arr_u64 = alloca [3 x <2 x i64>], align 8
 // CHECK-128-NEXT: %local_arr_f32 = alloca [3 x <4 x float>], align 8
 // CHECK-128-NEXT: %local_arr_f64 = alloca [3 x <2 x double>], align 8
+// CHECK-128-NEXT: %local_arr_i8m2 = alloca [3 x <32 x i8>], align 8
+// CHECK-128-NEXT: %local_arr_i16m2 = alloca [3 x <16 x i16>], align 8
+// CHECK-128-NEXT: %local_arr_i32m2 = alloca [3 x <8 x i32>], align 8
+// CHECK-128-NEXT: %local_arr_i64m2 = alloca [3 x <4 x i64>], align 8
+// CHECK-128-NEXT: %local_arr_u8m2 = alloca [3 x <32 x i8>], align 8
+// CHECK-128-NEXT: %local_arr_u16m2 = alloca [3 x <16 x i16>], align 8
+// CHECK-128-NEXT: %local_arr_u32m2 = alloca [3 x <8 x i32>], align 8
+// CHECK-128-NEXT: %local_arr_u64m2 = alloca [3 x <4 x i64>], align 8
+// CHECK-128-NEXT: %local_arr_f32m2 = alloca [3 x <8 x float>], align 8
+// CHECK-128-NEXT: %local_arr_f64m2 = alloca [3 x <4 x double>], align 8
+// CHECK-128-NEXT: %local_arr_i8m4 = alloca [3 x <64 x i8>], align 8
+// CHECK-128-NEXT: %local_arr_i16m4 = alloca [3 x <32 x i16>], align 8
+// CHECK-128-NEXT: %local_arr_i32m4 = alloca [3 x <16 x i32>], align 8
+// CHECK-128-NEXT: %local_arr_i64m4 = alloca [3 x <8 x i64>], align 8
+// CHECK-128-NEXT: %local_arr_u8m4 = alloca [3 x <64 x i8>], align 8
+// CHECK-128-NEXT: %local_arr_u16m4 = alloca [3 x <32 x i16>], align 8
+// CHECK-128-NEXT: %local_arr_u32m4 = alloca [3 x <16 x i32>], align 8
+// CHECK-128-NEXT: %local_arr_u64m4 = alloca [3 x <8 x i64>], align 8
+// CHECK-128-NEXT: %local_arr_f32m4 = alloca [3 x <16 x float>], align 8
+// CHECK-128-NEXT: %local_arr_f64m4 = alloca [3 x <8 x double>], align 8
+// CHECK-128-NEXT: %local_arr_i8m8 = alloca [3 x <128 x i8>], align 8
+// CHECK-128-NEXT: %local_arr_i16m8 = alloca [3 x <64 x i16>], align 8
+// CHECK-128-NEXT: %local_arr_i32m8 = alloca [3 x <32 x i32>], align 8
+// CHECK-128-NEXT: %local_arr_i64m8 = alloca [3 x <16 x i64>], align 8
+// CHECK-128-NEXT: %local_arr_u8m8 = alloca [3 x <128 x i8>], align 8
+// CHECK-128-NEXT: %local_arr_u16m8 = alloca [3 x <64 x i16>], align 8
+// CHECK-128-NEXT: %local_arr_u32m8 = alloca [3 x <32 x i32>], align 8
+// CHECK-128-NEXT: %local_arr_u64m8 = alloca [3 x <16 x i64>], align 8
+// CHECK-128-NEXT: %local_arr_f32m8 = alloca [3 x <32 x float>], align 8
+// CHECK-128-NEXT: %local_arr_f64m8 = alloca [3 x <16 x double>], align 8
 
 // CHECK-256:      %local_arr_i8 = alloca [3 x <32 x i8>], align 8
 // CHECK-256-NEXT: %local_arr_i16 = alloca [3 x <16 x i16>], align 8
@@ -451,6 +1539,36 @@ void f() {
 // CHECK-256-NEXT: %local_arr_u64 = alloca [3 x <4 x i64>], align 8
 // CHECK-256-NEXT: %local_arr_f32 = alloca [3 x <8 x float>], align 8
 // CHECK-256-NEXT: %local_arr_f64 = alloca [3 x <4 x double>], align 8
+// CHECK-256-NEXT: %local_arr_i8m2 = alloca [3 x <64 x i8>], align 8
+// CHECK-256-NEXT: %local_arr_i16m2 = alloca [3 x <32 x i16>], align 8
+// CHECK-256-NEXT: %local_arr_i32m2 = alloca [3 x <16 x i32>], align 8
+// CHECK-256-NEXT: %local_arr_i64m2 = alloca [3 x <8 x i64>], align 8
+// CHECK-256-NEXT: %local_arr_u8m2 = alloca [3 x <64 x i8>], align 8
+// CHECK-256-NEXT: %local_arr_u16m2 = alloca [3 x <32 x i16>], align 8
+// CHECK-256-NEXT: %local_arr_u32m2 = alloca [3 x <16 x i32>], align 8
+// CHECK-256-NEXT: %local_arr_u64m2 = alloca [3 x <8 x i64>], align 8
+// CHECK-256-NEXT: %local_arr_f32m2 = alloca [3 x <16 x float>], align 8
+// CHECK-256-NEXT: %local_arr_f64m2 = alloca [3 x <8 x double>], align 8
+// CHECK-256-NEXT: %local_arr_i8m4 = alloca [3 x <128 x i8>], align 8
+// CHECK-256-NEXT: %local_arr_i16m4 = alloca [3 x <64 x i16>], align 8
+// CHECK-256-NEXT: %local_arr_i32m4 = alloca [3 x <32 x i32>], align 8
+// CHECK-256-NEXT: %local_arr_i64m4 = alloca [3 x <16 x i64>], align 8
+// CHECK-256-NEXT: %local_arr_u8m4 = alloca [3 x <128 x i8>], align 8
+// CHECK-256-NEXT: %local_arr_u16m4 = alloca [3 x <64 x i16>], align 8
+// CHECK-256-NEXT: %local_arr_u32m4 = alloca [3 x <32 x i32>], align 8
+// CHECK-256-NEXT: %local_arr_u64m4 = alloca [3 x <16 x i64>], align 8
+// CHECK-256-NEXT: %local_arr_f32m4 = alloca [3 x <32 x float>], align 8
+// CHECK-256-NEXT: %local_arr_f64m4 = alloca [3 x <16 x double>], align 8
+// CHECK-256-NEXT: %local_arr_i8m8 = alloca [3 x <256 x i8>], align 8
+// CHECK-256-NEXT: %local_arr_i16m8 = alloca [3 x <128 x i16>], align 8
+// CHECK-256-NEXT: %local_arr_i32m8 = alloca [3 x <64 x i32>], align 8
+// CHECK-256-NEXT: %local_arr_i64m8 = alloca [3 x <32 x i64>], align 8
+// CHECK-256-NEXT: %local_arr_u8m8 = alloca [3 x <256 x i8>], align 8
+// CHECK-256-NEXT: %local_arr_u16m8 = alloca [3 x <128 x i16>], align 8
+// CHECK-256-NEXT: %local_arr_u32m8 = alloca [3 x <64 x i32>], align 8
+// CHECK-256-NEXT: %local_arr_u64m8 = alloca [3 x <32 x i64>], align 8
+// CHECK-256-NEXT: %local_arr_f32m8 = alloca [3 x <64 x float>], align 8
+// CHECK-256-NEXT: %local_arr_f64m8 = alloca [3 x <32 x double>], align 8
 
 // CHECK-512:      %local_arr_i8 = alloca [3 x <64 x i8>], align 8
 // CHECK-512-NEXT: %local_arr_i16 = alloca [3 x <32 x i16>], align 8
@@ -462,6 +1580,36 @@ void f() {
 // CHECK-512-NEXT: %local_arr_u64 = alloca [3 x <8 x i64>], align 8
 // CHECK-512-NEXT: %local_arr_f32 = alloca [3 x <16 x float>], align 8
 // CHECK-512-NEXT: %local_arr_f64 = alloca [3 x <8 x double>], align 8
+// CHECK-512-NEXT: %local_arr_i8m2 = alloca [3 x <128 x i8>], align 8
+// CHECK-512-NEXT: %local_arr_i16m2 = alloca [3 x <64 x i16>], align 8
+// CHECK-512-NEXT: %local_arr_i32m2 = alloca [3 x <32 x i32>], align 8
+// CHECK-512-NEXT: %local_arr_i64m2 = alloca [3 x <16 x i64>], align 8
+// CHECK-512-NEXT: %local_arr_u8m2 = alloca [3 x <128 x i8>], align 8
+// CHECK-512-NEXT: %local_arr_u16m2 = alloca [3 x <64 x i16>], align 8
+// CHECK-512-NEXT: %local_arr_u32m2 = alloca [3 x <32 x i32>], align 8
+// CHECK-512-NEXT: %local_arr_u64m2 = alloca [3 x <16 x i64>], align 8
+// CHECK-512-NEXT: %local_arr_f32m2 = alloca [3 x <32 x float>], align 8
+// CHECK-512-NEXT: %local_arr_f64m2 = alloca [3 x <16 x double>], align 8
+// CHECK-512-NEXT: %local_arr_i8m4 = alloca [3 x <256 x i8>], align 8
+// CHECK-512-NEXT: %local_arr_i16m4 = alloca [3 x <128 x i16>], align 8
+// CHECK-512-NEXT: %local_arr_i32m4 = alloca [3 x <64 x i32>], align 8
+// CHECK-512-NEXT: %local_arr_i64m4 = alloca [3 x <32 x i64>], align 8
+// CHECK-512-NEXT: %local_arr_u8m4 = alloca [3 x <256 x i8>], align 8
+// CHECK-512-NEXT: %local_arr_u16m4 = alloca [3 x <128 x i16>], align 8
+// CHECK-512-NEXT: %local_arr_u32m4 = alloca [3 x <64 x i32>], align 8
+// CHECK-512-NEXT: %local_arr_u64m4 = alloca [3 x <32 x i64>], align 8
+// CHECK-512-NEXT: %local_arr_f32m4 = alloca [3 x <64 x float>], align 8
+// CHECK-512-NEXT: %local_arr_f64m4 = alloca [3 x <32 x double>], align 8
+// CHECK-512-NEXT: %local_arr_i8m8 = alloca [3 x <512 x i8>], align 8
+// CHECK-512-NEXT: %local_arr_i16m8 = alloca [3 x <256 x i16>], align 8
+// CHECK-512-NEXT: %local_arr_i32m8 = alloca [3 x <128 x i32>], align 8
+// CHECK-512-NEXT: %local_arr_i64m8 = alloca [3 x <64 x i64>], align 8
+// CHECK-512-NEXT: %local_arr_u8m8 = alloca [3 x <512 x i8>], align 8
+// CHECK-512-NEXT: %local_arr_u16m8 = alloca [3 x <256 x i16>], align 8
+// CHECK-512-NEXT: %local_arr_u32m8 = alloca [3 x <128 x i32>], align 8
+// CHECK-512-NEXT: %local_arr_u64m8 = alloca [3 x <64 x i64>], align 8
+// CHECK-512-NEXT: %local_arr_f32m8 = alloca [3 x <128 x float>], align 8
+// CHECK-512-NEXT: %local_arr_f64m8 = alloca [3 x <64 x double>], align 8
 
 // CHECK-1024:       %local_arr_i8 = alloca [3 x <128 x i8>], align 8
 // CHECK-1024-NEXT:  %local_arr_i16 = alloca [3 x <64 x i16>], align 8
@@ -473,3 +1621,33 @@ void f() {
 // CHECK-1024-NEXT:  %local_arr_u64 = alloca [3 x <16 x i64>], align 8
 // CHECK-1024-NEXT:  %local_arr_f32 = alloca [3 x <32 x float>], align 8
 // CHECK-1024-NEXT:  %local_arr_f64 = alloca [3 x <16 x double>], align 8
+// CHECK-1024-NEXT:  %local_arr_i8m2 = alloca [3 x <256 x i8>], align 8
+// CHECK-1024-NEXT:  %local_arr_i16m2 = alloca [3 x <128 x i16>], align 8
+// CHECK-1024-NEXT:  %local_arr_i32m2 = alloca [3 x <64 x i32>], align 8
+// CHECK-1024-NEXT:  %local_arr_i64m2 = alloca [3 x <32 x i64>], align 8
+// CHECK-1024-NEXT:  %local_arr_u8m2 = alloca [3 x <256 x i8>], align 8
+// CHECK-1024-NEXT:  %local_arr_u16m2 = alloca [3 x <128 x i16>], align 8
+// CHECK-1024-NEXT:  %local_arr_u32m2 = alloca [3 x <64 x i32>], align 8
+// CHECK-1024-NEXT:  %local_arr_u64m2 = alloca [3 x <32 x i64>], align 8
+// CHECK-1024-NEXT:  %local_arr_f32m2 = alloca [3 x <64 x float>], align 8
+// CHECK-1024-NEXT:  %local_arr_f64m2 = alloca [3 x <32 x double>], align 8
+// CHECK-1024-NEXT:  %local_arr_i8m4 = alloca [3 x <512 x i8>], align 8
+// CHECK-1024-NEXT:  %local_arr_i16m4 = alloca [3 x <256 x i16>], align 8
+// CHECK-1024-NEXT:  %local_arr_i32m4 = alloca [3 x <128 x i32>], align 8
+// CHECK-1024-NEXT:  %local_arr_i64m4 = alloca [3 x <64 x i64>], align 8
+// CHECK-1024-NEXT:  %local_arr_u8m4 = alloca [3 x <512 x i8>], align 8
+// CHECK-1024-NEXT:  %local_arr_u16m4 = alloca [3 x <256 x i16>], align 8
+// CHECK-1024-NEXT:  %local_arr_u32m4 = alloca [3 x <128 x i32>], align 8
+// CHECK-1024-NEXT:  %local_arr_u64m4 = alloca [3 x <64 x i64>], align 8
+// CHECK-1024-NEXT:  %local_arr_f32m4 = alloca [3 x <128 x float>], align 8
+// CHECK-1024-NEXT:  %local_arr_f64m4 = alloca [3 x <64 x double>], align 8
+// CHECK-1024-NEXT:  %local_arr_i8m8 = alloca [3 x <1024 x i8>], align 8
+// CHECK-1024-NEXT:  %local_arr_i16m8 = alloca [3 x <512 x i16>], align 8
+// CHECK-1024-NEXT:  %local_arr_i32m8 = alloca [3 x <256 x i32>], align 8
+// CHECK-1024-NEXT:  %local_arr_i64m8 = alloca [3 x <128 x i64>], align 8
+// CHECK-1024-NEXT:  %local_arr_u8m8 = alloca [3 x <1024 x i8>], align 8
+// CHECK-1024-NEXT:  %local_arr_u16m8 = alloca [3 x <512 x i16>], align 8
+// CHECK-1024-NEXT:  %local_arr_u32m8 = alloca [3 x <256 x i32>], align 8
+// CHECK-1024-NEXT:  %local_arr_u64m8 = alloca [3 x <128 x i64>], align 8
+// CHECK-1024-NEXT:  %local_arr_f32m8 = alloca [3 x <256 x float>], align 8
+// CHECK-1024-NEXT:  %local_arr_f64m8 = alloca [3 x <128 x double>], align 8

diff  --git a/clang/test/Driver/riscv-rvv-vector-bits.c b/clang/test/Driver/riscv-rvv-vector-bits.c
index 273d692708ff9..7baee6092ac8d 100644
--- a/clang/test/Driver/riscv-rvv-vector-bits.c
+++ b/clang/test/Driver/riscv-rvv-vector-bits.c
@@ -63,4 +63,4 @@ typedef vint32m1_t noflag __attribute__((riscv_rvv_vector_bits(256)));
 
 typedef vint32_t bad_vector_size __attribute__((riscv_rvv_vector_bits(256)));
 
-// CHECK-BAD-VECTOR-SIZE-ERROR: error: invalid RVV vector size '256', must match value set by '-mrvv-vector-bits' ('128')
+// CHECK-BAD-VECTOR-SIZE-ERROR: error: invalid RVV vector size '256', expected size is '128' based on LMUL of type and '-mrvv-vector-bits'

diff  --git a/clang/test/Sema/attr-riscv-rvv-vector-bits.c b/clang/test/Sema/attr-riscv-rvv-vector-bits.c
index ebc556a830313..7e4aa2110e104 100644
--- a/clang/test/Sema/attr-riscv-rvv-vector-bits.c
+++ b/clang/test/Sema/attr-riscv-rvv-vector-bits.c
@@ -75,6 +75,26 @@ typedef __rvv_float32m8_t vfloat32m8_t;
 typedef __rvv_float64m8_t vfloat64m8_t;
 
 // Define valid fixed-width RVV types
+typedef vint8mf8_t fixed_int8mf8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 8)));
+
+typedef vuint8mf8_t fixed_uint8mf8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 8)));
+
+typedef vint8mf4_t fixed_int8mf4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 4)));
+typedef vint16mf4_t fixed_int16mf4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 4)));
+
+typedef vuint8mf4_t fixed_uint8mf4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 4)));
+typedef vuint16mf4_t fixed_uint16mf4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 4)));
+
+typedef vint8mf2_t fixed_int8mf2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 2)));
+typedef vint16mf2_t fixed_int16mf2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 2)));
+typedef vint32mf2_t fixed_int32mf2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 2)));
+
+typedef vuint8mf2_t fixed_uint8mf2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 2)));
+typedef vuint16mf2_t fixed_uint16mf2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 2)));
+typedef vuint32mf2_t fixed_uint32mf2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 2)));
+
+typedef vfloat32mf2_t fixed_float32mf2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 2)));
+
 typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
 typedef vint16m1_t fixed_int16m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
 typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
@@ -88,7 +108,66 @@ typedef vuint64m1_t fixed_uint64m1_t __attribute__((riscv_rvv_vector_bits(__risc
 typedef vfloat32m1_t fixed_float32m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
 typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
 
+typedef vint8m2_t fixed_int8m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+typedef vint16m2_t fixed_int16m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+typedef vint32m2_t fixed_int32m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+typedef vint64m2_t fixed_int64m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+
+typedef vuint8m2_t fixed_uint8m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+typedef vuint16m2_t fixed_uint16m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+typedef vuint32m2_t fixed_uint32m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+typedef vuint64m2_t fixed_uint64m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+
+typedef vfloat32m2_t fixed_float32m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+typedef vfloat64m2_t fixed_float64m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+
+typedef vint8m4_t fixed_int8m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+typedef vint16m4_t fixed_int16m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+typedef vint32m4_t fixed_int32m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+typedef vint64m4_t fixed_int64m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+
+typedef vuint8m4_t fixed_uint8m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+typedef vuint16m4_t fixed_uint16m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+typedef vuint32m4_t fixed_uint32m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+typedef vuint64m4_t fixed_uint64m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+
+typedef vfloat32m4_t fixed_float32m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+typedef vfloat64m4_t fixed_float64m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+
+typedef vint8m8_t fixed_int8m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+typedef vint16m8_t fixed_int16m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+typedef vint32m8_t fixed_int32m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+typedef vint64m8_t fixed_int64m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+
+typedef vuint8m8_t fixed_uint8m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+typedef vuint16m8_t fixed_uint16m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+typedef vuint32m8_t fixed_uint32m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+typedef vuint64m8_t fixed_uint64m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+
+typedef vfloat32m8_t fixed_float32m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+typedef vfloat64m8_t fixed_float64m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+
 // GNU vector types
+typedef int8_t gnu_int8mf8_t __attribute__((vector_size(__riscv_v_fixed_vlen / 64)));
+
+typedef uint8_t gnu_uint8mf8_t __attribute__((vector_size(__riscv_v_fixed_vlen / 64)));
+
+typedef int8_t gnu_int8mf4_t __attribute__((vector_size(__riscv_v_fixed_vlen / 32)));
+typedef int16_t gnu_int16mf4_t __attribute__((vector_size(__riscv_v_fixed_vlen / 32)));
+
+typedef uint8_t gnu_uint8mf4_t __attribute__((vector_size(__riscv_v_fixed_vlen / 32)));
+typedef uint16_t gnu_uint16mf4_t __attribute__((vector_size(__riscv_v_fixed_vlen / 32)));
+
+typedef int8_t gnu_int8mf2_t __attribute__((vector_size(__riscv_v_fixed_vlen / 16)));
+typedef int16_t gnu_int16mf2_t __attribute__((vector_size(__riscv_v_fixed_vlen / 16)));
+typedef int32_t gnu_int32mf2_t __attribute__((vector_size(__riscv_v_fixed_vlen / 16)));
+
+typedef uint8_t gnu_uint8mf2_t __attribute__((vector_size(__riscv_v_fixed_vlen / 16)));
+typedef uint16_t gnu_uint16mf2_t __attribute__((vector_size(__riscv_v_fixed_vlen / 16)));
+typedef uint32_t gnu_uint32mf2_t __attribute__((vector_size(__riscv_v_fixed_vlen / 16)));
+
+typedef float gnu_float32mf2_t __attribute__((vector_size(__riscv_v_fixed_vlen / 16)));
+
 typedef int8_t gnu_int8m1_t __attribute__((vector_size(__riscv_v_fixed_vlen / 8)));
 typedef int16_t gnu_int16m1_t __attribute__((vector_size(__riscv_v_fixed_vlen / 8)));
 typedef int32_t gnu_int32m1_t __attribute__((vector_size(__riscv_v_fixed_vlen / 8)));
@@ -102,7 +181,44 @@ typedef uint64_t gnu_uint64m1_t __attribute__((vector_size(__riscv_v_fixed_vlen
 typedef float gnu_float32m1_t __attribute__((vector_size(__riscv_v_fixed_vlen / 8)));
 typedef double gnu_float64m1_t __attribute__((vector_size(__riscv_v_fixed_vlen / 8)));
 
-typedef int32_t gnu_int32m2_t __attribute__((vector_size((__riscv_v_fixed_vlen * 2) / 8)));
+typedef int8_t gnu_int8m2_t __attribute__((vector_size(__riscv_v_fixed_vlen / 4)));
+typedef int16_t gnu_int16m2_t __attribute__((vector_size(__riscv_v_fixed_vlen / 4)));
+typedef int32_t gnu_int32m2_t __attribute__((vector_size(__riscv_v_fixed_vlen / 4)));
+typedef int64_t gnu_int64m2_t __attribute__((vector_size(__riscv_v_fixed_vlen / 4)));
+
+typedef uint8_t gnu_uint8m2_t __attribute__((vector_size(__riscv_v_fixed_vlen / 4)));
+typedef uint16_t gnu_uint16m2_t __attribute__((vector_size(__riscv_v_fixed_vlen / 4)));
+typedef uint32_t gnu_uint32m2_t __attribute__((vector_size(__riscv_v_fixed_vlen / 4)));
+typedef uint64_t gnu_uint64m2_t __attribute__((vector_size(__riscv_v_fixed_vlen / 4)));
+
+typedef float gnu_float32m2_t __attribute__((vector_size(__riscv_v_fixed_vlen / 4)));
+typedef double gnu_float64m2_t __attribute__((vector_size(__riscv_v_fixed_vlen / 4)));
+
+typedef int8_t gnu_int8m4_t __attribute__((vector_size(__riscv_v_fixed_vlen / 2)));
+typedef int16_t gnu_int16m4_t __attribute__((vector_size(__riscv_v_fixed_vlen / 2)));
+typedef int32_t gnu_int32m4_t __attribute__((vector_size(__riscv_v_fixed_vlen / 2)));
+typedef int64_t gnu_int64m4_t __attribute__((vector_size(__riscv_v_fixed_vlen / 2)));
+
+typedef uint8_t gnu_uint8m4_t __attribute__((vector_size(__riscv_v_fixed_vlen / 2)));
+typedef uint16_t gnu_uint16m4_t __attribute__((vector_size(__riscv_v_fixed_vlen / 2)));
+typedef uint32_t gnu_uint32m4_t __attribute__((vector_size(__riscv_v_fixed_vlen / 2)));
+typedef uint64_t gnu_uint64m4_t __attribute__((vector_size(__riscv_v_fixed_vlen / 2)));
+
+typedef float gnu_float32m4_t __attribute__((vector_size(__riscv_v_fixed_vlen / 2)));
+typedef double gnu_float64m4_t __attribute__((vector_size(__riscv_v_fixed_vlen / 2)));
+
+typedef int8_t gnu_int8m8_t __attribute__((vector_size(__riscv_v_fixed_vlen)));
+typedef int16_t gnu_int16m8_t __attribute__((vector_size(__riscv_v_fixed_vlen)));
+typedef int32_t gnu_int32m8_t __attribute__((vector_size(__riscv_v_fixed_vlen)));
+typedef int64_t gnu_int64m8_t __attribute__((vector_size(__riscv_v_fixed_vlen)));
+
+typedef uint8_t gnu_uint8m8_t __attribute__((vector_size(__riscv_v_fixed_vlen)));
+typedef uint16_t gnu_uint16m8_t __attribute__((vector_size(__riscv_v_fixed_vlen)));
+typedef uint32_t gnu_uint32m8_t __attribute__((vector_size(__riscv_v_fixed_vlen)));
+typedef uint64_t gnu_uint64m8_t __attribute__((vector_size(__riscv_v_fixed_vlen)));
+
+typedef float gnu_float32m8_t __attribute__((vector_size(__riscv_v_fixed_vlen)));
+typedef double gnu_float64m8_t __attribute__((vector_size(__riscv_v_fixed_vlen)));
 
 // Attribute must have a single argument
 typedef vint8m1_t no_argument __attribute__((riscv_rvv_vector_bits));         // expected-error {{'riscv_rvv_vector_bits' attribute takes one argument}}
@@ -114,12 +230,6 @@ typedef vint8m1_t non_int_size2 __attribute__((riscv_rvv_vector_bits("256"))); /
 
 // bool types and LMUL != 1 are not supported.
 typedef vbool1_t fixed_vbool1_t_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen))); // expected-error {{'riscv_rvv_vector_bits' attribute applied to non-RVV type 'vbool1_t'}}
-typedef vint8mf8_t fixed_int8mf8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 8))); // expected-error {{'riscv_rvv_vector_bits' attribute applied to non-RVV type 'vint8mf8_t'}}
-typedef vint8mf4_t fixed_int8mf4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 4))); // expected-error {{'riscv_rvv_vector_bits' attribute applied to non-RVV type 'vint8mf4_t'}}
-typedef vint8mf2_t fixed_int8mf2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 2))); // expected-error {{'riscv_rvv_vector_bits' attribute applied to non-RVV type 'vint8mf2_t'}}
-typedef vint8m2_t fixed_int8m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2))); // expected-error {{'riscv_rvv_vector_bits' attribute applied to non-RVV type 'vint8m2_t'}}
-typedef vint8m4_t fixed_int8m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4))); // expected-error {{'riscv_rvv_vector_bits' attribute applied to non-RVV type 'vint8m4_t'}}
-typedef vint8m8_t fixed_int8m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8))); // expected-error {{'riscv_rvv_vector_bits' attribute applied to non-RVV type 'vint8m8_t'}}
 
 // Attribute must be attached to a single RVV vector or predicate type.
 typedef void *badtype1 __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));         // expected-error {{'riscv_rvv_vector_bits' attribute applied to non-RVV type 'void *'}}
@@ -216,6 +326,26 @@ void f(int c) {
 
 #define VECTOR_SIZE ((__riscv_v_fixed_vlen / 8))
 
+_Static_assert(sizeof(fixed_int8mf8_t) == VECTOR_SIZE / 8, "");
+
+_Static_assert(sizeof(fixed_uint8mf8_t) == VECTOR_SIZE / 8, "");
+
+_Static_assert(sizeof(fixed_int8mf4_t) == VECTOR_SIZE / 4, "");
+_Static_assert(sizeof(fixed_int16mf4_t) == VECTOR_SIZE / 4, "");
+
+_Static_assert(sizeof(fixed_uint8mf4_t) == VECTOR_SIZE / 4, "");
+_Static_assert(sizeof(fixed_uint16mf4_t) == VECTOR_SIZE / 4, "");
+
+_Static_assert(sizeof(fixed_int8mf2_t) == VECTOR_SIZE / 2, "");
+_Static_assert(sizeof(fixed_int16mf2_t) == VECTOR_SIZE / 2, "");
+_Static_assert(sizeof(fixed_int32mf2_t) == VECTOR_SIZE / 2, "");
+
+_Static_assert(sizeof(fixed_uint8mf2_t) == VECTOR_SIZE / 2, "");
+_Static_assert(sizeof(fixed_uint16mf2_t) == VECTOR_SIZE / 2, "");
+_Static_assert(sizeof(fixed_uint32mf2_t) == VECTOR_SIZE / 2, "");
+
+_Static_assert(sizeof(fixed_float32mf2_t) == VECTOR_SIZE / 2, "");
+
 _Static_assert(sizeof(fixed_int8m1_t) == VECTOR_SIZE, "");
 _Static_assert(sizeof(fixed_int16m1_t) == VECTOR_SIZE, "");
 _Static_assert(sizeof(fixed_int32m1_t) == VECTOR_SIZE, "");
@@ -229,11 +359,70 @@ _Static_assert(sizeof(fixed_int64m1_t) == VECTOR_SIZE, "");
 _Static_assert(sizeof(fixed_float32m1_t) == VECTOR_SIZE, "");
 _Static_assert(sizeof(fixed_float64m1_t) == VECTOR_SIZE, "");
 
+_Static_assert(sizeof(fixed_int8m2_t) == VECTOR_SIZE * 2, "");
+_Static_assert(sizeof(fixed_int16m2_t) == VECTOR_SIZE * 2, "");
+_Static_assert(sizeof(fixed_int32m2_t) == VECTOR_SIZE * 2, "");
+_Static_assert(sizeof(fixed_int64m2_t) == VECTOR_SIZE * 2, "");
+
+_Static_assert(sizeof(fixed_uint8m2_t) == VECTOR_SIZE * 2, "");
+_Static_assert(sizeof(fixed_uint16m2_t) == VECTOR_SIZE * 2, "");
+_Static_assert(sizeof(fixed_uint32m2_t) == VECTOR_SIZE * 2, "");
+_Static_assert(sizeof(fixed_int64m2_t) == VECTOR_SIZE * 2, "");
+
+_Static_assert(sizeof(fixed_float32m2_t) == VECTOR_SIZE * 2, "");
+_Static_assert(sizeof(fixed_float64m2_t) == VECTOR_SIZE * 2, "");
+
+_Static_assert(sizeof(fixed_int8m4_t) == VECTOR_SIZE * 4, "");
+_Static_assert(sizeof(fixed_int16m4_t) == VECTOR_SIZE * 4, "");
+_Static_assert(sizeof(fixed_int32m4_t) == VECTOR_SIZE * 4, "");
+_Static_assert(sizeof(fixed_int64m4_t) == VECTOR_SIZE * 4, "");
+
+_Static_assert(sizeof(fixed_uint8m4_t) == VECTOR_SIZE * 4, "");
+_Static_assert(sizeof(fixed_uint16m4_t) == VECTOR_SIZE * 4, "");
+_Static_assert(sizeof(fixed_uint32m4_t) == VECTOR_SIZE * 4, "");
+_Static_assert(sizeof(fixed_int64m4_t) == VECTOR_SIZE * 4, "");
+
+_Static_assert(sizeof(fixed_float32m4_t) == VECTOR_SIZE * 4, "");
+_Static_assert(sizeof(fixed_float64m4_t) == VECTOR_SIZE * 4, "");
+
+_Static_assert(sizeof(fixed_int8m8_t) == VECTOR_SIZE * 8, "");
+_Static_assert(sizeof(fixed_int16m8_t) == VECTOR_SIZE * 8, "");
+_Static_assert(sizeof(fixed_int32m8_t) == VECTOR_SIZE * 8, "");
+_Static_assert(sizeof(fixed_int64m8_t) == VECTOR_SIZE * 8, "");
+
+_Static_assert(sizeof(fixed_uint8m8_t) == VECTOR_SIZE * 8, "");
+_Static_assert(sizeof(fixed_uint16m8_t) == VECTOR_SIZE * 8, "");
+_Static_assert(sizeof(fixed_uint32m8_t) == VECTOR_SIZE * 8, "");
+_Static_assert(sizeof(fixed_int64m8_t) == VECTOR_SIZE * 8, "");
+
+_Static_assert(sizeof(fixed_float32m8_t) == VECTOR_SIZE * 8, "");
+_Static_assert(sizeof(fixed_float64m8_t) == VECTOR_SIZE * 8, "");
+
 // --------------------------------------------------------------------------//
 // Alignof
 
 #define VECTOR_ALIGN 8
 
+_Static_assert(__alignof__(fixed_int8mf8_t) == VECTOR_ALIGN, "");
+
+_Static_assert(__alignof__(fixed_uint8mf8_t) == VECTOR_ALIGN, "");
+
+_Static_assert(__alignof__(fixed_int8mf4_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_int16mf4_t) == VECTOR_ALIGN, "");
+
+_Static_assert(__alignof__(fixed_uint8mf4_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_uint16mf4_t) == VECTOR_ALIGN, "");
+
+_Static_assert(__alignof__(fixed_int8mf2_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_int16mf2_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_int32mf2_t) == VECTOR_ALIGN, "");
+
+_Static_assert(__alignof__(fixed_uint8mf2_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_uint16mf2_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_uint32mf2_t) == VECTOR_ALIGN, "");
+
+_Static_assert(__alignof__(fixed_float32mf2_t) == VECTOR_ALIGN, "");
+
 _Static_assert(__alignof__(fixed_int8m1_t) == VECTOR_ALIGN, "");
 _Static_assert(__alignof__(fixed_int16m1_t) == VECTOR_ALIGN, "");
 _Static_assert(__alignof__(fixed_int32m1_t) == VECTOR_ALIGN, "");
@@ -247,17 +436,74 @@ _Static_assert(__alignof__(fixed_uint64m1_t) == VECTOR_ALIGN, "");
 _Static_assert(__alignof__(fixed_float32m1_t) == VECTOR_ALIGN, "");
 _Static_assert(__alignof__(fixed_float64m1_t) == VECTOR_ALIGN, "");
 
+_Static_assert(__alignof__(fixed_int8m2_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_int16m2_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_int32m2_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_int64m2_t) == VECTOR_ALIGN, "");
+
+_Static_assert(__alignof__(fixed_uint8m2_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_uint16m2_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_uint32m2_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_uint64m2_t) == VECTOR_ALIGN, "");
+
+_Static_assert(__alignof__(fixed_float32m2_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_float64m2_t) == VECTOR_ALIGN, "");
+
+_Static_assert(__alignof__(fixed_int8m4_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_int16m4_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_int32m4_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_int64m4_t) == VECTOR_ALIGN, "");
+
+_Static_assert(__alignof__(fixed_uint8m4_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_uint16m4_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_uint32m4_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_uint64m4_t) == VECTOR_ALIGN, "");
+
+_Static_assert(__alignof__(fixed_float32m4_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_float64m4_t) == VECTOR_ALIGN, "");
+
+_Static_assert(__alignof__(fixed_int8m8_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_int16m8_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_int32m8_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_int64m8_t) == VECTOR_ALIGN, "");
+
+_Static_assert(__alignof__(fixed_uint8m8_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_uint16m8_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_uint32m8_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_uint64m8_t) == VECTOR_ALIGN, "");
+
+_Static_assert(__alignof__(fixed_float32m8_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_float64m8_t) == VECTOR_ALIGN, "");
+
 // --------------------------------------------------------------------------//
 // Structs
 
 struct struct_int64 { fixed_int64m1_t x, y[5]; };
 struct struct_float64 { fixed_float64m1_t x, y[5]; };
 
+struct struct_int64m2 { fixed_int64m2_t x, y[5]; };
+struct struct_float64m2 { fixed_float64m2_t x, y[5]; };
+
+struct struct_int64m4 { fixed_int64m4_t x, y[5]; };
+struct struct_float64m4 { fixed_float64m4_t x, y[5]; };
+
+struct struct_int64m8 { fixed_int64m8_t x, y[5]; };
+struct struct_float64m8 { fixed_float64m8_t x, y[5]; };
+
 // --------------------------------------------------------------------------//
 // Unions
 union union_int64 { fixed_int64m1_t x, y[5]; };
 union union_float64 { fixed_float64m1_t x, y[5]; };
 
+union union_int64m2 { fixed_int64m2_t x, y[5]; };
+union union_float64m2 { fixed_float64m2_t x, y[5]; };
+
+union union_int64m4 { fixed_int64m4_t x, y[5]; };
+union union_float64m4 { fixed_float64m4_t x, y[5]; };
+
+union union_int64m8 { fixed_int64m8_t x, y[5]; };
+union union_float64m8 { fixed_float64m8_t x, y[5]; };
+
 // --------------------------------------------------------------------------//
 // Implicit casts
 
@@ -274,6 +520,22 @@ union union_float64 { fixed_float64m1_t x, y[5]; };
   TEST_CAST_GNU(v, TYPE)      \
   TEST_CAST_GNU(fixed_, TYPE)
 
+TEST_CAST_VECTOR(int8mf8)
+TEST_CAST_VECTOR(uint8mf8)
+
+TEST_CAST_VECTOR(int8mf4)
+TEST_CAST_VECTOR(int16mf4)
+TEST_CAST_VECTOR(uint8mf4)
+TEST_CAST_VECTOR(uint16mf4)
+
+TEST_CAST_VECTOR(int8mf2)
+TEST_CAST_VECTOR(int16mf2)
+TEST_CAST_VECTOR(int32mf2)
+TEST_CAST_VECTOR(uint8mf2)
+TEST_CAST_VECTOR(uint16mf2)
+TEST_CAST_VECTOR(uint32mf2)
+TEST_CAST_VECTOR(float32mf2)
+
 TEST_CAST_VECTOR(int8m1)
 TEST_CAST_VECTOR(int16m1)
 TEST_CAST_VECTOR(int32m1)
@@ -285,10 +547,38 @@ TEST_CAST_VECTOR(uint64m1)
 TEST_CAST_VECTOR(float32m1)
 TEST_CAST_VECTOR(float64m1)
 
-// Test that casts only work for LMUL=1 types and don't crash.
-vint32m2_t to_vint32m2_t_from_gnut(gnu_int32m2_t x) { return x; } // expected-error-re {{returning 'gnu_int32m2_t' (vector of {{[0-9]+}} 'int32_t' values) from a function with incompatible result type 'vint32m2_t' (aka '__rvv_int32m2_t')}}
-
-gnu_int32m2_t to_gnut_from_svint32_t(vint32m2_t x) { return x; } // expected-error-re {{returning 'vint32m2_t' (aka '__rvv_int32m2_t') from a function with incompatible result type 'gnu_int32m2_t' (vector of {{[0-9]+}} 'int32_t' values)}}
+TEST_CAST_VECTOR(int8m2)
+TEST_CAST_VECTOR(int16m2)
+TEST_CAST_VECTOR(int32m2)
+TEST_CAST_VECTOR(int64m2)
+TEST_CAST_VECTOR(uint8m2)
+TEST_CAST_VECTOR(uint16m2)
+TEST_CAST_VECTOR(uint32m2)
+TEST_CAST_VECTOR(uint64m2)
+TEST_CAST_VECTOR(float32m2)
+TEST_CAST_VECTOR(float64m2)
+
+TEST_CAST_VECTOR(int8m4)
+TEST_CAST_VECTOR(int16m4)
+TEST_CAST_VECTOR(int32m4)
+TEST_CAST_VECTOR(int64m4)
+TEST_CAST_VECTOR(uint8m4)
+TEST_CAST_VECTOR(uint16m4)
+TEST_CAST_VECTOR(uint32m4)
+TEST_CAST_VECTOR(uint64m4)
+TEST_CAST_VECTOR(float32m4)
+TEST_CAST_VECTOR(float64m4)
+
+TEST_CAST_VECTOR(int8m8)
+TEST_CAST_VECTOR(int16m8)
+TEST_CAST_VECTOR(int32m8)
+TEST_CAST_VECTOR(int64m8)
+TEST_CAST_VECTOR(uint8m8)
+TEST_CAST_VECTOR(uint16m8)
+TEST_CAST_VECTOR(uint32m8)
+TEST_CAST_VECTOR(uint64m8)
+TEST_CAST_VECTOR(float32m8)
+TEST_CAST_VECTOR(float64m8)
 
 // --------------------------------------------------------------------------//
 // Test the scalable and fixed-length types can be used interchangeably
@@ -296,6 +586,15 @@ gnu_int32m2_t to_gnut_from_svint32_t(vint32m2_t x) { return x; } // expected-err
 vint32m1_t __attribute__((overloadable)) vfunc(vint32m1_t op1, vint32m1_t op2);
 vfloat64m1_t __attribute__((overloadable)) vfunc(vfloat64m1_t op1, vfloat64m1_t op2);
 
+vint32m2_t __attribute__((overloadable)) vfunc(vint32m2_t op1, vint32m2_t op2);
+vfloat64m2_t __attribute__((overloadable)) vfunc(vfloat64m2_t op1, vfloat64m2_t op2);
+
+vint32m4_t __attribute__((overloadable)) vfunc(vint32m4_t op1, vint32m4_t op2);
+vfloat64m4_t __attribute__((overloadable)) vfunc(vfloat64m4_t op1, vfloat64m4_t op2);
+
+vint32m8_t __attribute__((overloadable)) vfunc(vint32m8_t op1, vint32m8_t op2);
+vfloat64m8_t __attribute__((overloadable)) vfunc(vfloat64m8_t op1, vfloat64m8_t op2);
+
 #define TEST_CALL(TYPE)                                              \
   fixed_##TYPE##_t                                                   \
       call_##TYPE##_ff(fixed_##TYPE##_t op1, fixed_##TYPE##_t op2) { \
@@ -313,6 +612,15 @@ vfloat64m1_t __attribute__((overloadable)) vfunc(vfloat64m1_t op1, vfloat64m1_t
 TEST_CALL(int32m1)
 TEST_CALL(float64m1)
 
+TEST_CALL(int32m2)
+TEST_CALL(float64m2)
+
+TEST_CALL(int32m4)
+TEST_CALL(float64m4)
+
+TEST_CALL(int32m8)
+TEST_CALL(float64m8)
+
 // --------------------------------------------------------------------------//
 // Vector initialization
 
@@ -321,12 +629,58 @@ TEST_CALL(float64m1)
 typedef vint32m1_t int32x8 __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
 typedef vfloat64m1_t float64x4 __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
 
+typedef vint32m2_t int32x16 __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+typedef vfloat64m2_t float64x8 __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
+
+typedef vint32m4_t int32x32 __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+typedef vfloat64m4_t float64x16 __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4)));
+
+typedef vint32m8_t int32x64 __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+typedef vfloat64m8_t float64x32 __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8)));
+
 int32x8 foo = {1, 2, 3, 4, 5, 6, 7, 8};
 int32x8 foo2 = {1, 2, 3, 4, 5, 6, 7, 8, 9}; // expected-warning{{excess elements in vector initializer}}
 
 float64x4 bar = {1.0, 2.0, 3.0, 4.0};
 float64x4 bar2 = {1.0, 2.0, 3.0, 4.0, 5.0}; // expected-warning{{excess elements in vector initializer}}
 
+int32x16 foom2 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+int32x16 foo2m2 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; // expected-warning{{excess elements in vector initializer}}
+
+float64x8 barm2 = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0};
+float64x8 bar2m2 = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}; // expected-warning{{excess elements in vector initializer}}
+
+int32x32 foom4 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+                  17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+                  32};
+int32x32 foo2m4 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+                   17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+                   32, 33}; // expected-warning{{excess elements in vector initializer}}
+
+float64x16 barm4 = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0,
+                    12.0, 13.0, 14.0, 15.0, 16.0};
+float64x16 bar2m4 = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0,
+                     12.0, 13.0, 14.0, 15.0, 16.0, 17.0}; // expected-warning{{excess elements in vector initializer}}
+
+int32x64 foom8 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+                  19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
+                  34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+                  49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+                  64};
+int32x64 foo2m8 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+                   18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+                   33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+                   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
+                   63, 64, 65}; // expected-warning{{excess elements in vector initializer}}
+
+float64x32 barm8 = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0,
+                    12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0,
+                    22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+                    32.0};
+float64x32 bar2m8 = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0,
+                     12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0,
+                     22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+                     32.0, 33.0}; // expected-warning{{excess elements in vector initializer}}
 #endif
 
 // --------------------------------------------------------------------------//
@@ -375,6 +729,23 @@ float64x4 bar2 = {1.0, 2.0, 3.0, 4.0, 5.0}; // expected-warning{{excess elements
   TEST_BINARY(TYPE, shr, <<) \
   TEST_UNARY(TYPE, not, ~)
 
+TEST_INT_OPS(fixed_int8mf8_t)
+TEST_INT_OPS(fixed_uint8mf8_t)
+
+TEST_INT_OPS(fixed_int8mf4_t)
+TEST_INT_OPS(fixed_int16mf4_t)
+TEST_INT_OPS(fixed_uint8mf4_t)
+TEST_INT_OPS(fixed_uint16mf4_t)
+
+TEST_INT_OPS(fixed_int8mf2_t)
+TEST_INT_OPS(fixed_int16mf2_t)
+TEST_INT_OPS(fixed_int32mf2_t)
+TEST_INT_OPS(fixed_uint8mf2_t)
+TEST_INT_OPS(fixed_uint16mf2_t)
+TEST_INT_OPS(fixed_uint32mf2_t)
+
+TEST_OPS(fixed_float32mf2_t)
+
 TEST_INT_OPS(fixed_int8m1_t)
 TEST_INT_OPS(fixed_int16m1_t)
 TEST_INT_OPS(fixed_int32m1_t)
@@ -386,3 +757,39 @@ TEST_INT_OPS(fixed_uint64m1_t)
 
 TEST_OPS(fixed_float32m1_t)
 TEST_OPS(fixed_float64m1_t)
+
+TEST_INT_OPS(fixed_int8m2_t)
+TEST_INT_OPS(fixed_int16m2_t)
+TEST_INT_OPS(fixed_int32m2_t)
+TEST_INT_OPS(fixed_int64m2_t)
+TEST_INT_OPS(fixed_uint8m2_t)
+TEST_INT_OPS(fixed_uint16m2_t)
+TEST_INT_OPS(fixed_uint32m2_t)
+TEST_INT_OPS(fixed_uint64m2_t)
+
+TEST_OPS(fixed_float32m2_t)
+TEST_OPS(fixed_float64m2_t)
+
+TEST_INT_OPS(fixed_int8m4_t)
+TEST_INT_OPS(fixed_int16m4_t)
+TEST_INT_OPS(fixed_int32m4_t)
+TEST_INT_OPS(fixed_int64m4_t)
+TEST_INT_OPS(fixed_uint8m4_t)
+TEST_INT_OPS(fixed_uint16m4_t)
+TEST_INT_OPS(fixed_uint32m4_t)
+TEST_INT_OPS(fixed_uint64m4_t)
+
+TEST_OPS(fixed_float32m4_t)
+TEST_OPS(fixed_float64m4_t)
+
+TEST_INT_OPS(fixed_int8m8_t)
+TEST_INT_OPS(fixed_int16m8_t)
+TEST_INT_OPS(fixed_int32m8_t)
+TEST_INT_OPS(fixed_int64m8_t)
+TEST_INT_OPS(fixed_uint8m8_t)
+TEST_INT_OPS(fixed_uint16m8_t)
+TEST_INT_OPS(fixed_uint32m8_t)
+TEST_INT_OPS(fixed_uint64m8_t)
+
+TEST_OPS(fixed_float32m8_t)
+TEST_OPS(fixed_float64m8_t)


        


[clang] 72d23a2 - [RISCV] Support LMUL!=1 for __attribute__((riscv_rvv_vector_bits(N)))

[clang] 72d23a2 - [RISCV] Support LMUL!=1 for attribute((riscv_rvv_vector_bits(N)))