[clang] [clang] Fix size and alignment of packed sub-byte integer vectors (PR #161796)

Thu Oct 2 23:33:37 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Steffen Larsen (steffenlarsen)

<details>
<summary>Changes</summary>

When using sub-byte integer types in vectors, the data is packed into the first N bits, where N is the bit-size of the sub-byte integer type multiplied by the number of vector elements. However, currently clang reports the size as if each element is one byte wide, based on the element type being considered a single byte wide in separation.

This commit fixes the reported size and alignment of the sub-byte vector types, so they correspond to the bit-packed layout they employ.

---
Full diff: https://github.com/llvm/llvm-project/pull/161796.diff


4 Files Affected:

- (modified) clang/lib/AST/ASTContext.cpp (+9-4) 
- (modified) clang/test/CodeGenCXX/ext-int.cpp (+15-13) 
- (modified) clang/test/CodeGenCXX/matrix-vector-bit-int.cpp (+13-15) 
- (modified) clang/test/SemaCXX/ext-int.cpp (+30) 


``````````diff

diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 056bfe36b2a0a..451a87c1cfc63 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -2093,10 +2093,15 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
   case Type::ExtVector:
   case Type::Vector: {
     const auto *VT = cast<VectorType>(T);
-    TypeInfo EltInfo = getTypeInfo(VT->getElementType());
-    Width = VT->isPackedVectorBoolType(*this)
-                ? VT->getNumElements()
-                : EltInfo.Width * VT->getNumElements();
+    QualType Elt = VT->getElementType();
+    uint64_t EltWidth = [&]() -> uint64_t {
+      if (VT->isPackedVectorBoolType(*this))
+        return 1;
+      if (Elt.getTypePtrOrNull() && Elt.getTypePtr()->isBitIntType())
+        return Elt.getTypePtr()->castAs<BitIntType>()->getNumBits();
+      return getTypeInfo(Elt).Width;
+    }();
+    Width = EltWidth * VT->getNumElements();
     // Enforce at least byte size and alignment.
     Width = std::max<unsigned>(8, Width);
     Align = std::max<unsigned>(8, Width);
diff --git a/clang/test/CodeGenCXX/ext-int.cpp b/clang/test/CodeGenCXX/ext-int.cpp
index a75b3701e36ef..0454363ca7f80 100644
--- a/clang/test/CodeGenCXX/ext-int.cpp
+++ b/clang/test/CodeGenCXX/ext-int.cpp
@@ -573,7 +573,7 @@ void VectorTest(uint16_t4 first, uint16_t4 second) {
 
 typedef unsigned _BitInt(4) uint4_t4 __attribute__((ext_vector_type(4)));
 void VectorTest(uint4_t4 first, uint4_t4 second) {
-  // LIN64: define{{.*}} void @_Z10VectorTestDv4_DU4_S0_(i32 %{{.+}}, i32 %{{.+}})
+  // LIN64: define{{.*}} void @_Z10VectorTestDv4_DU4_S0_(i16 %{{.+}}, i16 %{{.+}})
   // LIN32: define{{.*}} void @_Z10VectorTestDv4_DU4_S0_(<4 x i4> %{{.+}}, <4 x i4> %{{.+}})
   // WIN64: define dso_local void @"?VectorTest@@YAXT?$__vector at U?$_UBitInt@$03 at __clang@@$03 at __clang@@0 at Z"(<4 x i4> %{{.+}}, <4 x i4> %{{.+}})
   // WIN32: define dso_local void @"?VectorTest@@YAXT?$__vector at U?$_UBitInt@$03 at __clang@@$03 at __clang@@0 at Z"(<4 x i4> inreg %{{.+}}, <4 x i4> inreg %{{.+}})
@@ -585,23 +585,25 @@ void VectorTest(uint4_t4 first, uint4_t4 second) {
 
 typedef unsigned _BitInt(2) uint2_t2 __attribute__((ext_vector_type(2)));
 uint2_t2 TestBitIntVector2x2Alloca(uint2_t2 v1, uint2_t2 v2) {
-  // LIN64: define dso_local i16 @_Z25TestBitIntVector2x2AllocaDv2_DU2_S0_(i16 %[[V1Coerce:.+]], i16 %[[V2Coerce:.+]])
-  // LIN64: %[[RetVal:.+]] = alloca <2 x i2>, align 2
-  // LIN64: %[[V1Addr:.+]] = alloca <2 x i2>, align 2
-  // LIN64: %[[V2Addr:.+]] = alloca <2 x i2>, align 2
-  // LIN64: %[[RetValCoerce:.+]] = alloca i16, align 2
-  // LIN64: call void @llvm.memcpy.p0.p0.i64(ptr align 2 %[[RetValCoerce]], ptr align 2 %[[RetVal]], i64 1, i1 false)
-  // LIN64: %[[Ret:.+]] = load i16, ptr %[[RetValCoerce]], align 2
-  // LIN64: ret i16 %[[Ret]]
+  // LIN64: define dso_local i8 @_Z25TestBitIntVector2x2AllocaDv2_DU2_S0_(i8 %[[V1Coerce:.+]], i8 %[[V2Coerce:.+]])
+  // LIN64: %[[RetVal:.+]] = alloca <2 x i2>, align 1
+  // LIN64: %[[V1Addr:.+]] = alloca <2 x i2>, align 1
+  // LIN64: %[[V2Addr:.+]] = alloca <2 x i2>, align 1
+  // LIN64: %[[V1Val:.+]] = load <2 x i2>, ptr %[[V1Addr]], align 1
+  // LIN64: %[[V2Val:.+]] = load <2 x i2>, ptr %[[V2Addr]], align 1
+  // LIN64: %[[AddVal:.+]] = add <2 x i2> %0, %1
+  // LIN64: store <2 x i2> %[[AddVal]], ptr %[[RetVal]], align 1
+  // LIN64: %[[Ret:.+]] = load i8, ptr %[[RetVal]], align 1
+  // LIN64: ret i8 %[[Ret]]
 
   // LIN32: define dso_local <2 x i2> @_Z25TestBitIntVector2x2AllocaDv2_DU2_S0_(<2 x i2> %{{.+}}, <2 x i2> %{{.+}})
-  // LIN32: %[[V1Addr:.+]] = alloca <2 x i2>, align 2
-  // LIN32: %[[V2Addr:.+]] = alloca <2 x i2>, align 2
+  // LIN32: %[[V1Addr:.+]] = alloca <2 x i2>, align 1
+  // LIN32: %[[V2Addr:.+]] = alloca <2 x i2>, align 1
   // LIN32: ret <2 x i2> %[[Ret:.+]]
 
   // WIN: define dso_local <2 x i2> @"?TestBitIntVector2x2Alloca@@YAT?$__vector at U?$_UBitInt@$01 at __clang@@$01 at __clang@@T12 at 0@Z"(<2 x i2>{{.*}}, <2 x i2>{{.*}})
-  // WIN: %[[V1:.+]] = alloca <2 x i2>, align 2
-  // WIN: %[[V2:.+]] = alloca <2 x i2>, align 2
+  // WIN: %[[V1:.+]] = alloca <2 x i2>, align 1
+  // WIN: %[[V2:.+]] = alloca <2 x i2>, align 1
   // WIN: ret <2 x i2> %[[Ret:.+]]
   return v1 + v2;
 }
diff --git a/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp b/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
index 2e7531b334ecb..98b868fcd5bc2 100644
--- a/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
+++ b/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
@@ -70,27 +70,25 @@ i512x3 v3(i512x3 a) {
   return a + a;
 }
 
-// CHECK-LABEL: define dso_local i32 @_Z2v4Dv3_DB4_(
-// CHECK-SAME: i32 [[A_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-LABEL: define dso_local i16 @_Z2v4Dv3_DB4_(
+// CHECK-SAME: i16 [[A_COERCE:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <3 x i4>, align 4
-// CHECK-NEXT:    [[A:%.*]] = alloca <3 x i4>, align 4
-// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <3 x i4>, align 4
-// CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    store i32 [[A_COERCE]], ptr [[A]], align 4
-// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x i4>, ptr [[A]], align 4
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <3 x i4>, align 2
+// CHECK-NEXT:    [[A:%.*]] = alloca <3 x i4>, align 2
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <3 x i4>, align 2
+// CHECK-NEXT:    store i16 [[A_COERCE]], ptr [[A]], align 2
+// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x i4>, ptr [[A]], align 2
 // CHECK-NEXT:    [[A1:%.*]] = shufflevector <4 x i4> [[LOADVECN]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
 // CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i4> [[A1]], <3 x i4> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT:    store <4 x i4> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[LOADVECN2:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store <4 x i4> [[EXTRACTVEC]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[LOADVECN2:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 2
 // CHECK-NEXT:    [[EXTRACTVEC3:%.*]] = shufflevector <4 x i4> [[LOADVECN2]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
-// CHECK-NEXT:    [[LOADVECN4:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[LOADVECN4:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 2
 // CHECK-NEXT:    [[EXTRACTVEC5:%.*]] = shufflevector <4 x i4> [[LOADVECN4]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
 // CHECK-NEXT:    [[ADD:%.*]] = add <3 x i4> [[EXTRACTVEC3]], [[EXTRACTVEC5]]
-// CHECK-NEXT:    store <3 x i4> [[ADD]], ptr [[RETVAL]], align 4
-// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[RETVAL_COERCE]], ptr align 4 [[RETVAL]], i64 2, i1 false)
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[RETVAL_COERCE]], align 4
-// CHECK-NEXT:    ret i32 [[TMP0]]
+// CHECK-NEXT:    store <3 x i4> [[ADD]], ptr [[RETVAL]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[RETVAL]], align 2
+// CHECK-NEXT:    ret i16 [[TMP0]]
 //
 i4x3 v4(i4x3 a) {
   return a + a;
diff --git a/clang/test/SemaCXX/ext-int.cpp b/clang/test/SemaCXX/ext-int.cpp
index 5c566dafed931..d3b72761402d0 100644
--- a/clang/test/SemaCXX/ext-int.cpp
+++ b/clang/test/SemaCXX/ext-int.cpp
@@ -293,3 +293,33 @@ void FromPaper1() {
 void FromPaper2(_BitInt(8) a1, _BitInt(24) a2) {
   static_assert(is_same<decltype(a1 * (_BitInt(32))a2), _BitInt(32)>::value, "");
 }
+
+// Check sub-byte integer vector size and alignment, expecting packing.
+template <int Bits, int N>
+using packed_vec_t = _BitInt(Bits) __attribute__((ext_vector_type(N)));
+void SubByteVecPacking() {
+  static_assert(sizeof(packed_vec_t<2, 2>) == 1);
+  static_assert(sizeof(packed_vec_t<2, 3>) == 1);
+  static_assert(sizeof(packed_vec_t<2, 4>) == 1);
+  static_assert(sizeof(packed_vec_t<2, 8>) == 2);
+  static_assert(sizeof(packed_vec_t<2, 16>) == 4);
+  static_assert(sizeof(packed_vec_t<2, 32>) == 8);
+  static_assert(sizeof(packed_vec_t<4, 2>) == 1);
+  static_assert(sizeof(packed_vec_t<4, 4>) == 2);
+  static_assert(sizeof(packed_vec_t<4, 8>) == 4);
+  static_assert(sizeof(packed_vec_t<4, 16>) == 8);
+  static_assert(sizeof(packed_vec_t<4, 32>) == 16);
+
+  static_assert(alignof(packed_vec_t<2, 2>) == 1);
+  static_assert(alignof(packed_vec_t<2, 3>) == 1);
+  static_assert(alignof(packed_vec_t<2, 4>) == 1);
+  static_assert(alignof(packed_vec_t<2, 8>) == 2);
+  static_assert(alignof(packed_vec_t<2, 16>) == 4);
+  static_assert(alignof(packed_vec_t<2, 32>) == 8);
+  static_assert(alignof(packed_vec_t<4, 2>) == 1);
+  static_assert(alignof(packed_vec_t<4, 3>) == 2);
+  static_assert(alignof(packed_vec_t<4, 4>) == 2);
+  static_assert(alignof(packed_vec_t<4, 8>) == 4);
+  static_assert(alignof(packed_vec_t<4, 16>) == 8);
+  static_assert(alignof(packed_vec_t<4, 32>) == 16);
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/161796