[clang] [Clang] Freeze padded vectors before storing. (PR #164821)
Florian Hahn via cfe-commits
cfe-commits at lists.llvm.org
Mon Oct 27 20:20:59 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/164821
>From 982b2154bdfbd56f74f38822c67aa6a8ee9ad741 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 23 Oct 2025 14:01:19 +0100
Subject: [PATCH 1/4] [Clang] Add test
---
.../CodeGen/AArch64/ext-vector-coercion.c | 42 +++++++++++++++++++
1 file changed, 42 insertions(+)
create mode 100644 clang/test/CodeGen/AArch64/ext-vector-coercion.c
diff --git a/clang/test/CodeGen/AArch64/ext-vector-coercion.c b/clang/test/CodeGen/AArch64/ext-vector-coercion.c
new file mode 100644
index 0000000000000..0b24264e9b077
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/ext-vector-coercion.c
@@ -0,0 +1,42 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -fenable-matrix -triple arm64-apple-macosx %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+typedef float float3 __attribute__((ext_vector_type(3)));
+struct Vec3 {
+ union {
+ struct {
+ float x;
+ float y;
+ float z;
+ };
+ float vec __attribute__((ext_vector_type(3)));
+ };
+};
+
+// CHECK-LABEL: define i128 @add(
+// CHECK-SAME: i128 [[A_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_VEC3:%.*]], align 16
+// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_VEC3]], align 16
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT: store i128 [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr [[TMP0]], align 16
+// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT: [[LOADVECN1:%.*]] = load <4 x float>, ptr [[TMP1]], align 16
+// CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <4 x float> [[LOADVECN1]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK-NEXT: [[ADD:%.*]] = fadd <3 x float> [[EXTRACTVEC]], [[EXTRACTVEC2]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <3 x float> [[ADD]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT: store <4 x float> [[EXTRACTVEC3]], ptr [[TMP2]], align 16
+// CHECK-NEXT: [[COERCE_DIVE4:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = load i128, ptr [[COERCE_DIVE4]], align 16
+// CHECK-NEXT: ret i128 [[TMP3]]
+//
+struct Vec3 add(struct Vec3 a) {
+ struct Vec3 res;
+ res.vec = a.vec + a.vec;
+ return res;
+}
+
>From e0b7676236641ceff31351d0aec10aea70b0989a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 23 Oct 2025 14:03:30 +0100
Subject: [PATCH 2/4] [Clang] Freeze padded vectors before storing.
Currently Clang usually leaves padding bits uninitialized, which means
they are undef at the moment.
When expanding stores of vector types to include padding, the padding
lanes will be poison, hence the padding bits will be poison.
This interacts badly with coercion of arguments and return values, where
3 x float vectors will be loaded as i128 integer; poisoning the padding
bits will make the whole value poison.
---
clang/lib/CodeGen/CGExpr.cpp | 3 +++
clang/test/CodeGen/AArch64/ext-vector-coercion.c | 7 ++++---
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 301d5770cf78f..a581cf821092f 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2300,6 +2300,9 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
SmallVector<int, 16> Mask(NewVecTy->getNumElements(), -1);
std::iota(Mask.begin(), Mask.begin() + VecTy->getNumElements(), 0);
Value = Builder.CreateShuffleVector(Value, Mask, "extractVec");
+ // The extra lanes will be poison. Freeze the whole vector to make sure
+ // the padding memory is not poisoned, which may break coercion.
+ Value = Builder.CreateFreeze(Value);
SrcTy = NewVecTy;
}
if (Addr.getElementType() != SrcTy)
diff --git a/clang/test/CodeGen/AArch64/ext-vector-coercion.c b/clang/test/CodeGen/AArch64/ext-vector-coercion.c
index 0b24264e9b077..044a3457b8fa9 100644
--- a/clang/test/CodeGen/AArch64/ext-vector-coercion.c
+++ b/clang/test/CodeGen/AArch64/ext-vector-coercion.c
@@ -29,10 +29,11 @@ struct Vec3 {
// CHECK-NEXT: [[ADD:%.*]] = fadd <3 x float> [[EXTRACTVEC]], [[EXTRACTVEC2]]
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[RETVAL]], i32 0, i32 0
// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <3 x float> [[ADD]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT: store <4 x float> [[EXTRACTVEC3]], ptr [[TMP2]], align 16
+// CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x float> [[EXTRACTVEC3]]
+// CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP2]], align 16
// CHECK-NEXT: [[COERCE_DIVE4:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[RETVAL]], i32 0, i32 0
-// CHECK-NEXT: [[TMP3:%.*]] = load i128, ptr [[COERCE_DIVE4]], align 16
-// CHECK-NEXT: ret i128 [[TMP3]]
+// CHECK-NEXT: [[TMP4:%.*]] = load i128, ptr [[COERCE_DIVE4]], align 16
+// CHECK-NEXT: ret i128 [[TMP4]]
//
struct Vec3 add(struct Vec3 a) {
struct Vec3 res;
>From 8ef57e7d8ba3e3b8ad0637cf2b7aaf01690d6957 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 23 Oct 2025 19:23:07 +0100
Subject: [PATCH 3/4] !fixup use undef instead of poison for padding.
---
clang/lib/CodeGen/CGExpr.cpp | 8 ++++----
clang/test/CodeGen/AArch64/ext-vector-coercion.c | 9 ++++-----
clang/test/CodeGenCXX/matrix-vector-bit-int.cpp | 8 ++++----
clang/test/CodeGenOpenCL/preserve_vec3.cl | 8 ++++----
4 files changed, 16 insertions(+), 17 deletions(-)
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index a581cf821092f..fbec449c9f608 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2299,10 +2299,10 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
VecTy != NewVecTy) {
SmallVector<int, 16> Mask(NewVecTy->getNumElements(), -1);
std::iota(Mask.begin(), Mask.begin() + VecTy->getNumElements(), 0);
- Value = Builder.CreateShuffleVector(Value, Mask, "extractVec");
- // The extra lanes will be poison. Freeze the whole vector to make sure
- // the padding memory is not poisoned, which may break coercion.
- Value = Builder.CreateFreeze(Value);
+ // Use undef instead of poison for the padding lanes, to make sure no
+ // padding bits are poisoned, which may break coercion.
+ Value = Builder.CreateShuffleVector(Value, llvm::UndefValue::get(VecTy),
+ Mask, "extractVec");
SrcTy = NewVecTy;
}
if (Addr.getElementType() != SrcTy)
diff --git a/clang/test/CodeGen/AArch64/ext-vector-coercion.c b/clang/test/CodeGen/AArch64/ext-vector-coercion.c
index 044a3457b8fa9..f9baff50fd634 100644
--- a/clang/test/CodeGen/AArch64/ext-vector-coercion.c
+++ b/clang/test/CodeGen/AArch64/ext-vector-coercion.c
@@ -28,12 +28,11 @@ struct Vec3 {
// CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <4 x float> [[LOADVECN1]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[ADD:%.*]] = fadd <3 x float> [[EXTRACTVEC]], [[EXTRACTVEC2]]
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[RETVAL]], i32 0, i32 0
-// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <3 x float> [[ADD]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x float> [[EXTRACTVEC3]]
-// CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP2]], align 16
+// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <3 x float> [[ADD]], <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT: store <4 x float> [[EXTRACTVEC3]], ptr [[TMP2]], align 16
// CHECK-NEXT: [[COERCE_DIVE4:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[RETVAL]], i32 0, i32 0
-// CHECK-NEXT: [[TMP4:%.*]] = load i128, ptr [[COERCE_DIVE4]], align 16
-// CHECK-NEXT: ret i128 [[TMP4]]
+// CHECK-NEXT: [[TMP3:%.*]] = load i128, ptr [[COERCE_DIVE4]], align 16
+// CHECK-NEXT: ret i128 [[TMP3]]
//
struct Vec3 add(struct Vec3 a) {
struct Vec3 res;
diff --git a/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp b/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
index 2e7531b334ecb..0decf75b80e6a 100644
--- a/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
+++ b/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
@@ -19,7 +19,7 @@ using i4x3x3 = _BitInt(4) __attribute__((matrix_type(3, 3)));
// CHECK-NEXT: store i32 [[A_COERCE]], ptr [[A]], align 4
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i8>, ptr [[A]], align 4
// CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i8> [[LOADVECN]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
-// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A1]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A1]], <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[LOADVECN2:%.*]] = load <4 x i8>, ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <4 x i8> [[LOADVECN2]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
@@ -38,7 +38,7 @@ i8x3 v1(i8x3 a) {
// CHECK-SAME: <3 x i32> noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i32>, align 16
-// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[A]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[A]], <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC]], ptr [[A_ADDR]], align 16
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
// CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <4 x i32> [[LOADVECN]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
@@ -57,7 +57,7 @@ i32x3 v2(i32x3 a) {
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i512>, align 256
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i512>, ptr [[TMP0]], align 256
// CHECK-NEXT: [[A:%.*]] = shufflevector <4 x i512> [[LOADVECN]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
-// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i512> [[A]], <3 x i512> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i512> [[A]], <3 x i512> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: store <4 x i512> [[EXTRACTVEC]], ptr [[A_ADDR]], align 256
// CHECK-NEXT: [[LOADVECN1:%.*]] = load <4 x i512>, ptr [[A_ADDR]], align 256
// CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <4 x i512> [[LOADVECN1]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
@@ -80,7 +80,7 @@ i512x3 v3(i512x3 a) {
// CHECK-NEXT: store i32 [[A_COERCE]], ptr [[A]], align 4
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i4>, ptr [[A]], align 4
// CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i4> [[LOADVECN]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
-// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i4> [[A1]], <3 x i4> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i4> [[A1]], <3 x i4> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: store <4 x i4> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[LOADVECN2:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <4 x i4> [[LOADVECN2]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
diff --git a/clang/test/CodeGenOpenCL/preserve_vec3.cl b/clang/test/CodeGenOpenCL/preserve_vec3.cl
index e76aa81f918cb..66d955b4af443 100644
--- a/clang/test/CodeGenOpenCL/preserve_vec3.cl
+++ b/clang/test/CodeGenOpenCL/preserve_vec3.cl
@@ -71,8 +71,8 @@ void kernel char8_to_short3(global short3 *a, global char8 *b) {
// CHECK-LABEL: define dso_local spir_func void @from_char3(
// CHECK-SAME: <3 x i8> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[INT_TBAA3:![0-9]+]]
+// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <3 x i8> [[A]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT: store <4 x i8> [[TMP0]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[INT_TBAA3:![0-9]+]]
// CHECK-NEXT: ret void
//
void from_char3(char3 a, global int *out) {
@@ -82,8 +82,8 @@ void from_char3(char3 a, global int *out) {
// CHECK-LABEL: define dso_local spir_func void @from_short3(
// CHECK-SAME: <3 x i16> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[A]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[LONG_TBAA19:![0-9]+]]
+// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[A]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT: store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[LONG_TBAA19:![0-9]+]]
// CHECK-NEXT: ret void
//
void from_short3(short3 a, global long *out) {
>From dd821bbf446b2b23bbcd4201c8a2d06179cd0abd Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 28 Oct 2025 03:18:10 +0000
Subject: [PATCH 4/4] !fixup don't use poison mask
---
clang/lib/CodeGen/CGExpr.cpp | 3 ++-
clang/test/CodeGen/AArch64/ext-vector-coercion.c | 2 +-
clang/test/CodeGenCXX/matrix-vector-bit-int.cpp | 8 ++++----
clang/test/CodeGenOpenCL/preserve_vec3.cl | 12 +++++++-----
4 files changed, 14 insertions(+), 11 deletions(-)
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index fbec449c9f608..01f2161f27555 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2297,7 +2297,8 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
CGM.getABIInfo().getOptimalVectorMemoryType(VecTy, getLangOpts());
if (!ClangVecTy->isPackedVectorBoolType(getContext()) &&
VecTy != NewVecTy) {
- SmallVector<int, 16> Mask(NewVecTy->getNumElements(), -1);
+ SmallVector<int, 16> Mask(NewVecTy->getNumElements(),
+ VecTy->getNumElements());
std::iota(Mask.begin(), Mask.begin() + VecTy->getNumElements(), 0);
// Use undef instead of poison for the padding lanes, to make sure no
// padding bits are poisoned, which may break coercion.
diff --git a/clang/test/CodeGen/AArch64/ext-vector-coercion.c b/clang/test/CodeGen/AArch64/ext-vector-coercion.c
index f9baff50fd634..354980afe06d7 100644
--- a/clang/test/CodeGen/AArch64/ext-vector-coercion.c
+++ b/clang/test/CodeGen/AArch64/ext-vector-coercion.c
@@ -28,7 +28,7 @@ struct Vec3 {
// CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <4 x float> [[LOADVECN1]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[ADD:%.*]] = fadd <3 x float> [[EXTRACTVEC]], [[EXTRACTVEC2]]
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[RETVAL]], i32 0, i32 0
-// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <3 x float> [[ADD]], <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <3 x float> [[ADD]], <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: store <4 x float> [[EXTRACTVEC3]], ptr [[TMP2]], align 16
// CHECK-NEXT: [[COERCE_DIVE4:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[RETVAL]], i32 0, i32 0
// CHECK-NEXT: [[TMP3:%.*]] = load i128, ptr [[COERCE_DIVE4]], align 16
diff --git a/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp b/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
index 0decf75b80e6a..4be1cb3067c2f 100644
--- a/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
+++ b/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
@@ -19,7 +19,7 @@ using i4x3x3 = _BitInt(4) __attribute__((matrix_type(3, 3)));
// CHECK-NEXT: store i32 [[A_COERCE]], ptr [[A]], align 4
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i8>, ptr [[A]], align 4
// CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i8> [[LOADVECN]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
-// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A1]], <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A1]], <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[LOADVECN2:%.*]] = load <4 x i8>, ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <4 x i8> [[LOADVECN2]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
@@ -38,7 +38,7 @@ i8x3 v1(i8x3 a) {
// CHECK-SAME: <3 x i32> noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i32>, align 16
-// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[A]], <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[A]], <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC]], ptr [[A_ADDR]], align 16
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
// CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <4 x i32> [[LOADVECN]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
@@ -57,7 +57,7 @@ i32x3 v2(i32x3 a) {
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i512>, align 256
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i512>, ptr [[TMP0]], align 256
// CHECK-NEXT: [[A:%.*]] = shufflevector <4 x i512> [[LOADVECN]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
-// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i512> [[A]], <3 x i512> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i512> [[A]], <3 x i512> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: store <4 x i512> [[EXTRACTVEC]], ptr [[A_ADDR]], align 256
// CHECK-NEXT: [[LOADVECN1:%.*]] = load <4 x i512>, ptr [[A_ADDR]], align 256
// CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <4 x i512> [[LOADVECN1]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
@@ -80,7 +80,7 @@ i512x3 v3(i512x3 a) {
// CHECK-NEXT: store i32 [[A_COERCE]], ptr [[A]], align 4
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i4>, ptr [[A]], align 4
// CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i4> [[LOADVECN]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
-// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i4> [[A1]], <3 x i4> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i4> [[A1]], <3 x i4> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: store <4 x i4> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[LOADVECN2:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <4 x i4> [[LOADVECN2]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
diff --git a/clang/test/CodeGenOpenCL/preserve_vec3.cl b/clang/test/CodeGenOpenCL/preserve_vec3.cl
index 66d955b4af443..0017169b8cf48 100644
--- a/clang/test/CodeGenOpenCL/preserve_vec3.cl
+++ b/clang/test/CodeGenOpenCL/preserve_vec3.cl
@@ -12,7 +12,7 @@ typedef float float4 __attribute__((ext_vector_type(4)));
// CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual [[META8:![0-9]+]] !kernel_arg_type [[META9:![0-9]+]] !kernel_arg_base_type [[META10:![0-9]+]] !kernel_arg_type_qual [[META11:![0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16
-// CHECK-NEXT: [[EXTRACTVEC1_I:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT: [[EXTRACTVEC1_I:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> <float undef, float poison, float poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: store <4 x float> [[EXTRACTVEC1_I]], ptr addrspace(1) [[B]], align 16, !tbaa [[CHAR_TBAA12:![0-9]+]]
// CHECK-NEXT: ret void
//
@@ -24,7 +24,7 @@ void kernel foo(global float3 *a, global float3 *b) {
// CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[A:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META14:![0-9]+]] !kernel_arg_type_qual [[META11]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[CHAR_TBAA12]]
-// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> <float undef, float poison, float poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr addrspace(1) [[A]], align 16, !tbaa [[CHAR_TBAA12]]
// CHECK-NEXT: ret void
//
@@ -60,7 +60,7 @@ void kernel float3_to_double2(global float3 *a, global double2 *b) {
// CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], ptr addrspace(1) noundef readonly align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META18:![0-9]+]] !kernel_arg_type_qual [[META11]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = load <3 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[CHAR_TBAA12]]
-// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[TMP0]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[TMP0]], <3 x i16> <i16 undef, i16 poison, i16 poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[CHAR_TBAA12]]
// CHECK-NEXT: ret void
//
@@ -94,7 +94,8 @@ void from_short3(short3 a, global long *out) {
// CHECK-SAME: i32 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[A]] to <4 x i8>
-// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT: [[ASTYPE:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[ASTYPE]], <3 x i8> <i8 undef, i8 poison, i8 poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[CHAR_TBAA12]]
// CHECK-NEXT: ret void
//
@@ -106,7 +107,8 @@ void scalar_to_char3(int a, global char3 *out) {
// CHECK-SAME: i64 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <4 x i16>
-// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT: [[ASTYPE:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[ASTYPE]], <3 x i16> <i16 undef, i16 poison, i16 poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA12]]
// CHECK-NEXT: ret void
//
More information about the cfe-commits
mailing list