[clang] 3afbf89 - [clang][AArch64][SVE] Handle PRValue under VLAT <-> VLST cast
Jun Ma via cfe-commits
cfe-commits at lists.llvm.org
Wed Jun 30 19:10:59 PDT 2021
Author: Jun Ma
Date: 2021-07-01T10:09:47+08:00
New Revision: 3afbf898044aa5839ed75273fa38a897abe9d3d4
URL: https://github.com/llvm/llvm-project/commit/3afbf898044aa5839ed75273fa38a897abe9d3d4
DIFF: https://github.com/llvm/llvm-project/commit/3afbf898044aa5839ed75273fa38a897abe9d3d4.diff
LOG: [clang][AArch64][SVE] Handle PRValue under VLAT <-> VLST cast
This change fixes the crash that PRValue cannot be handled by
EmitLValue.
Differential Revision: https://reviews.llvm.org/D105097
Added:
Modified:
clang/lib/CodeGen/CGExprScalar.cpp
clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index d299a1d38e0eb..92015c8efda0a 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2095,24 +2095,11 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
isa<llvm::ScalableVectorType>(DstTy)) ||
(isa<llvm::ScalableVectorType>(SrcTy) &&
isa<llvm::FixedVectorType>(DstTy))) {
- if (const CallExpr *CE = dyn_cast<CallExpr>(E)) {
- // Call expressions can't have a scalar return unless the return type
- // is a reference type so an lvalue can't be emitted. Create a temp
- // alloca to store the call, bitcast the address then load.
- QualType RetTy = CE->getCallReturnType(CGF.getContext());
- Address Addr =
- CGF.CreateDefaultAlignTempAlloca(SrcTy, "saved-call-rvalue");
- LValue LV = CGF.MakeAddrLValue(Addr, RetTy);
- CGF.EmitStoreOfScalar(Src, LV);
- Addr = Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(DestTy),
- "castFixedSve");
- LValue DestLV = CGF.MakeAddrLValue(Addr, DestTy);
- DestLV.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo());
- return EmitLoadOfLValue(DestLV, CE->getExprLoc());
- }
-
- Address Addr = EmitLValue(E).getAddress(CGF);
- Addr = Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(DestTy));
+ Address Addr = CGF.CreateDefaultAlignTempAlloca(SrcTy, "saved-value");
+ LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
+ CGF.EmitStoreOfScalar(Src, LV);
+ Addr = Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(DestTy),
+ "castFixedSve");
LValue DestLV = CGF.MakeAddrLValue(Addr, DestTy);
DestLV.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo());
return EmitLoadOfLValue(DestLV, CE->getExprLoc());
diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
index 73ac3f49cf3bd..278cc930610bd 100644
--- a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
+++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
@@ -30,21 +30,21 @@ DEFINE_STRUCT(bool)
// CHECK-128-LABEL: @read_int64(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16, [[TBAA6:!tbaa !.*]]
+// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6:![0-9]+]]
// CHECK-128-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> [[TMP0]], i64 0)
// CHECK-128-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
//
// CHECK-256-LABEL: @read_int64(
// CHECK-256-NEXT: entry:
// CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[ARRAYIDX]], align 16, [[TBAA6:!tbaa !.*]]
+// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6:![0-9]+]]
// CHECK-256-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP0]], i64 0)
// CHECK-256-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
//
// CHECK-512-LABEL: @read_int64(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[ARRAYIDX]], align 16, [[TBAA6:!tbaa !.*]]
+// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6:![0-9]+]]
// CHECK-512-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[TMP0]], i64 0)
// CHECK-512-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
//
@@ -56,21 +56,21 @@ svint64_t read_int64(struct struct_int64 *s) {
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> [[X:%.*]], i64 0)
// CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-128-NEXT: store <2 x i64> [[CASTFIXEDSVE]], <2 x i64>* [[ARRAYIDX]], align 16, [[TBAA6]]
+// CHECK-128-NEXT: store <2 x i64> [[CASTFIXEDSVE]], <2 x i64>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: ret void
//
// CHECK-256-LABEL: @write_int64(
// CHECK-256-NEXT: entry:
// CHECK-256-NEXT: [[CASTFIXEDSVE:%.*]] = call <4 x i64> @llvm.experimental.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[X:%.*]], i64 0)
// CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-256-NEXT: store <4 x i64> [[CASTFIXEDSVE]], <4 x i64>* [[ARRAYIDX]], align 16, [[TBAA6]]
+// CHECK-256-NEXT: store <4 x i64> [[CASTFIXEDSVE]], <4 x i64>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-256-NEXT: ret void
//
// CHECK-512-LABEL: @write_int64(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[X:%.*]], i64 0)
// CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-512-NEXT: store <8 x i64> [[CASTFIXEDSVE]], <8 x i64>* [[ARRAYIDX]], align 16, [[TBAA6]]
+// CHECK-512-NEXT: store <8 x i64> [[CASTFIXEDSVE]], <8 x i64>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: ret void
//
void write_int64(struct struct_int64 *s, svint64_t x) {
@@ -84,21 +84,21 @@ void write_int64(struct struct_int64 *s, svint64_t x) {
// CHECK-128-LABEL: @read_float64(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16, [[TBAA6]]
+// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> undef, <2 x double> [[TMP0]], i64 0)
// CHECK-128-NEXT: ret <vscale x 2 x double> [[CASTSCALABLESVE]]
//
// CHECK-256-LABEL: @read_float64(
// CHECK-256-NEXT: entry:
// CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x double>, <4 x double>* [[ARRAYIDX]], align 16, [[TBAA6]]
+// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x double>, <4 x double>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-256-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v4f64(<vscale x 2 x double> undef, <4 x double> [[TMP0]], i64 0)
// CHECK-256-NEXT: ret <vscale x 2 x double> [[CASTSCALABLESVE]]
//
// CHECK-512-LABEL: @read_float64(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x double>, <8 x double>* [[ARRAYIDX]], align 16, [[TBAA6]]
+// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x double>, <8 x double>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[TMP0]], i64 0)
// CHECK-512-NEXT: ret <vscale x 2 x double> [[CASTSCALABLESVE]]
//
@@ -110,21 +110,21 @@ svfloat64_t read_float64(struct struct_float64 *s) {
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = call <2 x double> @llvm.experimental.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> [[X:%.*]], i64 0)
// CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-128-NEXT: store <2 x double> [[CASTFIXEDSVE]], <2 x double>* [[ARRAYIDX]], align 16, [[TBAA6]]
+// CHECK-128-NEXT: store <2 x double> [[CASTFIXEDSVE]], <2 x double>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: ret void
//
// CHECK-256-LABEL: @write_float64(
// CHECK-256-NEXT: entry:
// CHECK-256-NEXT: [[CASTFIXEDSVE:%.*]] = call <4 x double> @llvm.experimental.vector.extract.v4f64.nxv2f64(<vscale x 2 x double> [[X:%.*]], i64 0)
// CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-256-NEXT: store <4 x double> [[CASTFIXEDSVE]], <4 x double>* [[ARRAYIDX]], align 16, [[TBAA6]]
+// CHECK-256-NEXT: store <4 x double> [[CASTFIXEDSVE]], <4 x double>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-256-NEXT: ret void
//
// CHECK-512-LABEL: @write_float64(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[X:%.*]], i64 0)
// CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-512-NEXT: store <8 x double> [[CASTFIXEDSVE]], <8 x double>* [[ARRAYIDX]], align 16, [[TBAA6]]
+// CHECK-512-NEXT: store <8 x double> [[CASTFIXEDSVE]], <8 x double>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: ret void
//
void write_float64(struct struct_float64 *s, svfloat64_t x) {
@@ -138,21 +138,21 @@ void write_float64(struct struct_float64 *s, svfloat64_t x) {
// CHECK-128-LABEL: @read_bfloat16(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-128-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[ARRAYIDX]], align 16, [[TBAA6]]
+// CHECK-128-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x bfloat> @llvm.experimental.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> undef, <8 x bfloat> [[TMP0]], i64 0)
// CHECK-128-NEXT: ret <vscale x 8 x bfloat> [[CASTSCALABLESVE]]
//
// CHECK-256-LABEL: @read_bfloat16(
// CHECK-256-NEXT: entry:
// CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-256-NEXT: [[TMP0:%.*]] = load <16 x bfloat>, <16 x bfloat>* [[ARRAYIDX]], align 16, [[TBAA6]]
+// CHECK-256-NEXT: [[TMP0:%.*]] = load <16 x bfloat>, <16 x bfloat>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-256-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x bfloat> @llvm.experimental.vector.insert.nxv8bf16.v16bf16(<vscale x 8 x bfloat> undef, <16 x bfloat> [[TMP0]], i64 0)
// CHECK-256-NEXT: ret <vscale x 8 x bfloat> [[CASTSCALABLESVE]]
//
// CHECK-512-LABEL: @read_bfloat16(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-512-NEXT: [[TMP0:%.*]] = load <32 x bfloat>, <32 x bfloat>* [[ARRAYIDX]], align 16, [[TBAA6]]
+// CHECK-512-NEXT: [[TMP0:%.*]] = load <32 x bfloat>, <32 x bfloat>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x bfloat> @llvm.experimental.vector.insert.nxv8bf16.v32bf16(<vscale x 8 x bfloat> undef, <32 x bfloat> [[TMP0]], i64 0)
// CHECK-512-NEXT: ret <vscale x 8 x bfloat> [[CASTSCALABLESVE]]
//
@@ -164,21 +164,21 @@ svbfloat16_t read_bfloat16(struct struct_bfloat16 *s) {
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = call <8 x bfloat> @llvm.experimental.vector.extract.v8bf16.nxv8bf16(<vscale x 8 x bfloat> [[X:%.*]], i64 0)
// CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-128-NEXT: store <8 x bfloat> [[CASTFIXEDSVE]], <8 x bfloat>* [[ARRAYIDX]], align 16, [[TBAA6]]
+// CHECK-128-NEXT: store <8 x bfloat> [[CASTFIXEDSVE]], <8 x bfloat>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: ret void
//
// CHECK-256-LABEL: @write_bfloat16(
// CHECK-256-NEXT: entry:
// CHECK-256-NEXT: [[CASTFIXEDSVE:%.*]] = call <16 x bfloat> @llvm.experimental.vector.extract.v16bf16.nxv8bf16(<vscale x 8 x bfloat> [[X:%.*]], i64 0)
// CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-256-NEXT: store <16 x bfloat> [[CASTFIXEDSVE]], <16 x bfloat>* [[ARRAYIDX]], align 16, [[TBAA6]]
+// CHECK-256-NEXT: store <16 x bfloat> [[CASTFIXEDSVE]], <16 x bfloat>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-256-NEXT: ret void
//
// CHECK-512-LABEL: @write_bfloat16(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = call <32 x bfloat> @llvm.experimental.vector.extract.v32bf16.nxv8bf16(<vscale x 8 x bfloat> [[X:%.*]], i64 0)
// CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-512-NEXT: store <32 x bfloat> [[CASTFIXEDSVE]], <32 x bfloat>* [[ARRAYIDX]], align 16, [[TBAA6]]
+// CHECK-512-NEXT: store <32 x bfloat> [[CASTFIXEDSVE]], <32 x bfloat>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: ret void
//
void write_bfloat16(struct struct_bfloat16 *s, svbfloat16_t x) {
@@ -191,23 +191,32 @@ void write_bfloat16(struct struct_bfloat16 *s, svbfloat16_t x) {
// CHECK-128-LABEL: @read_bool(
// CHECK-128-NEXT: entry:
+// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <2 x i8>, align 16
// CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-128-NEXT: [[TMP0:%.*]] = bitcast <2 x i8>* [[ARRAYIDX]] to <vscale x 16 x i1>*
-// CHECK-128-NEXT: [[TMP1:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP0]], align 2, [[TBAA6]]
+// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]]
+// CHECK-128-NEXT: store <2 x i8> [[TMP0]], <2 x i8>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA6]]
+// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <2 x i8>* [[SAVED_VALUE]] to <vscale x 16 x i1>*
+// CHECK-128-NEXT: [[TMP1:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: ret <vscale x 16 x i1> [[TMP1]]
//
// CHECK-256-LABEL: @read_bool(
// CHECK-256-NEXT: entry:
+// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca <4 x i8>, align 16
// CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast <4 x i8>* [[ARRAYIDX]] to <vscale x 16 x i1>*
-// CHECK-256-NEXT: [[TMP1:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP0]], align 2, [[TBAA6]]
+// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i8>, <4 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]]
+// CHECK-256-NEXT: store <4 x i8> [[TMP0]], <4 x i8>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA6]]
+// CHECK-256-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <4 x i8>* [[SAVED_VALUE]] to <vscale x 16 x i1>*
+// CHECK-256-NEXT: [[TMP1:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-256-NEXT: ret <vscale x 16 x i1> [[TMP1]]
//
// CHECK-512-LABEL: @read_bool(
// CHECK-512-NEXT: entry:
+// CHECK-512-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 16
// CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[ARRAYIDX]] to <vscale x 16 x i1>*
-// CHECK-512-NEXT: [[TMP1:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP0]], align 2, [[TBAA6]]
+// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]]
+// CHECK-512-NEXT: store <8 x i8> [[TMP0]], <8 x i8>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA6]]
+// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to <vscale x 16 x i1>*
+// CHECK-512-NEXT: [[TMP1:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: ret <vscale x 16 x i1> [[TMP1]]
//
svbool_t read_bool(struct struct_bool *s) {
@@ -216,32 +225,32 @@ svbool_t read_bool(struct struct_bool *s) {
// CHECK-128-LABEL: @write_bool(
// CHECK-128-NEXT: entry:
-// CHECK-128-NEXT: [[X_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
-// CHECK-128-NEXT: store <vscale x 16 x i1> [[X:%.*]], <vscale x 16 x i1>* [[X_ADDR]], align 16, [[TBAA9:!tbaa !.*]]
-// CHECK-128-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[X_ADDR]] to <2 x i8>*
-// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 16, [[TBAA6]]
+// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-128-NEXT: store <vscale x 16 x i1> [[X:%.*]], <vscale x 16 x i1>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA9:![0-9]+]]
+// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_VALUE]] to <2 x i8>*
+// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-128-NEXT: store <2 x i8> [[TMP1]], <2 x i8>* [[ARRAYIDX]], align 2, [[TBAA6]]
+// CHECK-128-NEXT: store <2 x i8> [[TMP0]], <2 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]]
// CHECK-128-NEXT: ret void
//
// CHECK-256-LABEL: @write_bool(
// CHECK-256-NEXT: entry:
-// CHECK-256-NEXT: [[X_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
-// CHECK-256-NEXT: store <vscale x 16 x i1> [[X:%.*]], <vscale x 16 x i1>* [[X_ADDR]], align 16, [[TBAA9:!tbaa !.*]]
-// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[X_ADDR]] to <4 x i8>*
-// CHECK-256-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 16, [[TBAA6]]
+// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-256-NEXT: store <vscale x 16 x i1> [[X:%.*]], <vscale x 16 x i1>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA9:![0-9]+]]
+// CHECK-256-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_VALUE]] to <4 x i8>*
+// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i8>, <4 x i8>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-256-NEXT: store <4 x i8> [[TMP1]], <4 x i8>* [[ARRAYIDX]], align 2, [[TBAA6]]
+// CHECK-256-NEXT: store <4 x i8> [[TMP0]], <4 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]]
// CHECK-256-NEXT: ret void
//
// CHECK-512-LABEL: @write_bool(
// CHECK-512-NEXT: entry:
-// CHECK-512-NEXT: [[X_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
-// CHECK-512-NEXT: store <vscale x 16 x i1> [[X:%.*]], <vscale x 16 x i1>* [[X_ADDR]], align 16, [[TBAA9:!tbaa !.*]]
-// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[X_ADDR]] to <8 x i8>*
-// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 16, [[TBAA6]]
+// CHECK-512-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-512-NEXT: store <vscale x 16 x i1> [[X:%.*]], <vscale x 16 x i1>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA9:![0-9]+]]
+// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_VALUE]] to <8 x i8>*
+// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-512-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[ARRAYIDX]], align 2, [[TBAA6]]
+// CHECK-512-NEXT: store <8 x i8> [[TMP0]], <8 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]]
// CHECK-512-NEXT: ret void
//
void write_bool(struct struct_bool *s, svbool_t x) {
diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
index edc307745a2aa..136647cada3ad 100644
--- a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
+++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
@@ -79,9 +79,9 @@ fixed_float64_t call_float64_ff(svbool_t pg, fixed_float64_t op1, fixed_float64_
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OP1:%.*]] = alloca <8 x i8>, align 16
// CHECK-NEXT: [[OP2:%.*]] = alloca <8 x i8>, align 16
-// CHECK-NEXT: [[OP1_ADDR:%.*]] = alloca <8 x i8>, align 16
-// CHECK-NEXT: [[OP2_ADDR:%.*]] = alloca <8 x i8>, align 16
-// CHECK-NEXT: [[SAVED_CALL_RVALUE:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 16
+// CHECK-NEXT: [[SAVED_VALUE3:%.*]] = alloca <8 x i8>, align 16
+// CHECK-NEXT: [[SAVED_VALUE5:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[OP1]] to <vscale x 16 x i1>*
// CHECK-NEXT: store <vscale x 16 x i1> [[OP1_COERCE:%.*]], <vscale x 16 x i1>* [[TMP0]], align 16
@@ -89,20 +89,20 @@ fixed_float64_t call_float64_ff(svbool_t pg, fixed_float64_t op1, fixed_float64_
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP2]] to <vscale x 16 x i1>*
// CHECK-NEXT: store <vscale x 16 x i1> [[OP2_COERCE:%.*]], <vscale x 16 x i1>* [[TMP1]], align 16
// CHECK-NEXT: [[OP22:%.*]] = load <8 x i8>, <8 x i8>* [[OP2]], align 16, !tbaa [[TBAA6]]
-// CHECK-NEXT: store <8 x i8> [[OP11]], <8 x i8>* [[OP1_ADDR]], align 16, !tbaa [[TBAA6]]
-// CHECK-NEXT: store <8 x i8> [[OP22]], <8 x i8>* [[OP2_ADDR]], align 16, !tbaa [[TBAA6]]
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to <vscale x 16 x i1>*
-// CHECK-NEXT: [[TMP3:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP2]], align 16, !tbaa [[TBAA6]]
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8>* [[OP2_ADDR]] to <vscale x 16 x i1>*
-// CHECK-NEXT: [[TMP5:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP4]], align 16, !tbaa [[TBAA6]]
-// CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]])
-// CHECK-NEXT: store <vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]], align 16, !tbaa [[TBAA9:![0-9]+]]
-// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]] to <8 x i8>*
-// CHECK-NEXT: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: store <8 x i8> [[OP11]], <8 x i8>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to <vscale x 16 x i1>*
+// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: store <8 x i8> [[OP22]], <8 x i8>* [[SAVED_VALUE3]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[CASTFIXEDSVE4:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE3]] to <vscale x 16 x i1>*
+// CHECK-NEXT: [[TMP3:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE4]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]])
+// CHECK-NEXT: store <vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1>* [[SAVED_VALUE5]], align 16, !tbaa [[TBAA9:![0-9]+]]
+// CHECK-NEXT: [[CASTFIXEDSVE6:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_VALUE5]] to <8 x i8>*
+// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE6]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to <8 x i8>*
-// CHECK-NEXT: store <8 x i8> [[TMP7]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16
-// CHECK-NEXT: [[TMP8:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP8]]
+// CHECK-NEXT: store <8 x i8> [[TMP5]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16
+// CHECK-NEXT: [[TMP6:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP6]]
//
fixed_bool_t call_bool_ff(svbool_t pg, fixed_bool_t op1, fixed_bool_t op2) {
return svsel(pg, op1, op2);
@@ -135,23 +135,23 @@ fixed_float64_t call_float64_fs(svbool_t pg, fixed_float64_t op1, svfloat64_t op
// CHECK-LABEL: @call_bool_fs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OP1:%.*]] = alloca <8 x i8>, align 16
-// CHECK-NEXT: [[OP1_ADDR:%.*]] = alloca <8 x i8>, align 16
-// CHECK-NEXT: [[SAVED_CALL_RVALUE:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 16
+// CHECK-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[OP1]] to <vscale x 16 x i1>*
// CHECK-NEXT: store <vscale x 16 x i1> [[OP1_COERCE:%.*]], <vscale x 16 x i1>* [[TMP0]], align 16
// CHECK-NEXT: [[OP11:%.*]] = load <8 x i8>, <8 x i8>* [[OP1]], align 16, !tbaa [[TBAA6]]
-// CHECK-NEXT: store <8 x i8> [[OP11]], <8 x i8>* [[OP1_ADDR]], align 16, !tbaa [[TBAA6]]
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to <vscale x 16 x i1>*
-// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP1]], align 16, !tbaa [[TBAA6]]
-// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[TMP2]], <vscale x 16 x i1> [[OP2:%.*]])
-// CHECK-NEXT: store <vscale x 16 x i1> [[TMP3]], <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]], align 16, !tbaa [[TBAA9]]
-// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]] to <8 x i8>*
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: store <8 x i8> [[OP11]], <8 x i8>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to <vscale x 16 x i1>*
+// CHECK-NEXT: [[TMP1:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[TMP1]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT: store <vscale x 16 x i1> [[TMP2]], <vscale x 16 x i1>* [[SAVED_VALUE2]], align 16, !tbaa [[TBAA9]]
+// CHECK-NEXT: [[CASTFIXEDSVE3:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_VALUE2]] to <8 x i8>*
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE3]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to <8 x i8>*
-// CHECK-NEXT: store <8 x i8> [[TMP4]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16
-// CHECK-NEXT: [[TMP5:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP5]]
+// CHECK-NEXT: store <8 x i8> [[TMP3]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP4]]
//
fixed_bool_t call_bool_fs(svbool_t pg, fixed_bool_t op1, svbool_t op2) {
return svsel(pg, op1, op2);
@@ -183,11 +183,11 @@ fixed_float64_t call_float64_ss(svbool_t pg, svfloat64_t op1, svfloat64_t op2) {
// CHECK-LABEL: @call_bool_ss(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[SAVED_CALL_RVALUE:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
-// CHECK-NEXT: store <vscale x 16 x i1> [[TMP0]], <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]], align 16, !tbaa [[TBAA9]]
-// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]] to <8 x i8>*
+// CHECK-NEXT: store <vscale x 16 x i1> [[TMP0]], <vscale x 16 x i1>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA9]]
+// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_VALUE]] to <8 x i8>*
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to <8 x i8>*
// CHECK-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16
diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
index e9b9c1f9c385b..a7f275bd1f0b4 100644
--- a/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
+++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
@@ -17,13 +17,19 @@ fixed_int32_t global_vec;
// CHECK-NEXT: [[PRED_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 2
// CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca <vscale x 4 x i32>, align 16
// CHECK-NEXT: [[PG:%.*]] = alloca <vscale x 16 x i1>, align 2
+// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 8
+// CHECK-NEXT: [[SAVED_VALUE1:%.*]] = alloca <8 x i8>, align 8
// CHECK-NEXT: store <vscale x 16 x i1> [[PRED:%.*]], <vscale x 16 x i1>* [[PRED_ADDR]], align 2
// CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], <vscale x 4 x i32>* [[VEC_ADDR]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[PRED_ADDR]], align 2
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* @global_pred, align 2
-// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* bitcast (<8 x i8>* @global_pred to <vscale x 16 x i1>*), align 2
+// CHECK-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[SAVED_VALUE]], align 8
+// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to <vscale x 16 x i1>*
+// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* @global_pred, align 2
-// CHECK-NEXT: [[TMP4:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* bitcast (<8 x i8>* @global_pred to <vscale x 16 x i1>*), align 2
+// CHECK-NEXT: store <8 x i8> [[TMP3]], <8 x i8>* [[SAVED_VALUE1]], align 8
+// CHECK-NEXT: [[CASTFIXEDSVE2:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE1]] to <vscale x 16 x i1>*
+// CHECK-NEXT: [[TMP4:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE2]], align 8
// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> [[TMP0]], <vscale x 16 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP4]])
// CHECK-NEXT: store <vscale x 16 x i1> [[TMP5]], <vscale x 16 x i1>* [[PG]], align 2
// CHECK-NEXT: [[TMP6:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[PG]], align 2
@@ -32,11 +38,11 @@ fixed_int32_t global_vec;
// CHECK-NEXT: [[TMP8:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[VEC_ADDR]], align 16
// CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP6]])
// CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> [[CASTSCALABLESVE]], <vscale x 4 x i32> [[TMP8]])
-// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TMP10]], i64 0)
-// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE]], <16 x i32>* [[RETVAL]], align 16
+// CHECK-NEXT: [[CASTFIXEDSVE3:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TMP10]], i64 0)
+// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE3]], <16 x i32>* [[RETVAL]], align 16
// CHECK-NEXT: [[TMP11:%.*]] = load <16 x i32>, <16 x i32>* [[RETVAL]], align 16
-// CHECK-NEXT: [[CASTSCALABLESVE1:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP11]], i64 0)
-// CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE1]]
+// CHECK-NEXT: [[CASTSCALABLESVE4:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP11]], i64 0)
+// CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE4]]
//
fixed_int32_t foo(svbool_t pred, svint32_t vec) {
svbool_t pg = svand_z(pred, global_pred, global_pred);
@@ -103,3 +109,49 @@ fixed_bool_t address_of_array_idx() {
parr = &arr[0];
return *parr;
}
+
+// CHECK-LABEL: @test_cast(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <16 x i32>, align 16
+// CHECK-NEXT: [[PRED_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 2
+// CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca <vscale x 4 x i32>, align 16
+// CHECK-NEXT: [[XX:%.*]] = alloca <16 x i32>, align 16
+// CHECK-NEXT: [[YY:%.*]] = alloca <16 x i32>, align 16
+// CHECK-NEXT: [[PG:%.*]] = alloca <vscale x 16 x i1>, align 2
+// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 8
+// CHECK-NEXT: [[SAVED_VALUE1:%.*]] = alloca <16 x i32>, align 64
+// CHECK-NEXT: store <vscale x 16 x i1> [[PRED:%.*]], <vscale x 16 x i1>* [[PRED_ADDR]], align 2
+// CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], <vscale x 4 x i32>* [[VEC_ADDR]], align 16
+// CHECK-NEXT: store <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, <16 x i32>* [[XX]], align 16
+// CHECK-NEXT: store <16 x i32> <i32 2, i32 5, i32 4, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, <16 x i32>* [[YY]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[PRED_ADDR]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* @global_pred, align 2
+// CHECK-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[SAVED_VALUE]], align 8
+// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to <vscale x 16 x i1>*
+// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, <16 x i32>* [[XX]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, <16 x i32>* [[YY]], align 16
+// CHECK-NEXT: [[ADD:%.*]] = add <16 x i32> [[TMP3]], [[TMP4]]
+// CHECK-NEXT: store <16 x i32> [[ADD]], <16 x i32>* [[SAVED_VALUE1]], align 64
+// CHECK-NEXT: [[CASTFIXEDSVE2:%.*]] = bitcast <16 x i32>* [[SAVED_VALUE1]] to <vscale x 16 x i1>*
+// CHECK-NEXT: [[TMP5:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE2]], align 64
+// CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> [[TMP0]], <vscale x 16 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP5]])
+// CHECK-NEXT: store <vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1>* [[PG]], align 2
+// CHECK-NEXT: [[TMP7:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[PG]], align 2
+// CHECK-NEXT: [[TMP8:%.*]] = load <16 x i32>, <16 x i32>* @global_vec, align 16
+// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP8]], i64 0)
+// CHECK-NEXT: [[TMP9:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[VEC_ADDR]], align 16
+// CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP7]])
+// CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> [[TMP10]], <vscale x 4 x i32> [[CASTSCALABLESVE]], <vscale x 4 x i32> [[TMP9]])
+// CHECK-NEXT: [[CASTFIXEDSVE3:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TMP11]], i64 0)
+// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE3]], <16 x i32>* [[RETVAL]], align 16
+// CHECK-NEXT: [[TMP12:%.*]] = load <16 x i32>, <16 x i32>* [[RETVAL]], align 16
+// CHECK-NEXT: [[CASTSCALABLESVE4:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP12]], i64 0)
+// CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE4]]
+//
+fixed_int32_t test_cast(svbool_t pred, svint32_t vec) {
+ fixed_int32_t xx = {1, 2, 3, 4};
+ fixed_int32_t yy = {2, 5, 4, 6};
+ svbool_t pg = svand_z(pred, global_pred, xx + yy);
+ return svadd_m(pg, global_vec, vec);
+}
diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
index d81cd3627b2b0..0171c763ef512 100644
--- a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
+++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
@@ -22,13 +22,13 @@ fixed_bool_t global_bool;
// CHECK-128-LABEL: @write_global_i64(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> [[V:%.*]], i64 0)
-// CHECK-128-NEXT: store <2 x i64> [[CASTFIXEDSVE]], <2 x i64>* @global_i64, align 16, [[TBAA6:!tbaa !.*]]
+// CHECK-128-NEXT: store <2 x i64> [[CASTFIXEDSVE]], <2 x i64>* @global_i64, align 16, !tbaa [[TBAA6:![0-9]+]]
// CHECK-128-NEXT: ret void
//
// CHECK-512-LABEL: @write_global_i64(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[V:%.*]], i64 0)
-// CHECK-512-NEXT: store <8 x i64> [[CASTFIXEDSVE]], <8 x i64>* @global_i64, align 16, [[TBAA6:!tbaa !.*]]
+// CHECK-512-NEXT: store <8 x i64> [[CASTFIXEDSVE]], <8 x i64>* @global_i64, align 16, !tbaa [[TBAA6:![0-9]+]]
// CHECK-512-NEXT: ret void
//
void write_global_i64(svint64_t v) { global_i64 = v; }
@@ -36,33 +36,33 @@ void write_global_i64(svint64_t v) { global_i64 = v; }
// CHECK-128-LABEL: @write_global_bf16(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = call <8 x bfloat> @llvm.experimental.vector.extract.v8bf16.nxv8bf16(<vscale x 8 x bfloat> [[V:%.*]], i64 0)
-// CHECK-128-NEXT: store <8 x bfloat> [[CASTFIXEDSVE]], <8 x bfloat>* @global_bf16, align 16, [[TBAA6]]
+// CHECK-128-NEXT: store <8 x bfloat> [[CASTFIXEDSVE]], <8 x bfloat>* @global_bf16, align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: ret void
//
// CHECK-512-LABEL: @write_global_bf16(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = call <32 x bfloat> @llvm.experimental.vector.extract.v32bf16.nxv8bf16(<vscale x 8 x bfloat> [[V:%.*]], i64 0)
-// CHECK-512-NEXT: store <32 x bfloat> [[CASTFIXEDSVE]], <32 x bfloat>* @global_bf16, align 16, [[TBAA6]]
+// CHECK-512-NEXT: store <32 x bfloat> [[CASTFIXEDSVE]], <32 x bfloat>* @global_bf16, align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: ret void
//
void write_global_bf16(svbfloat16_t v) { global_bf16 = v; }
// CHECK-128-LABEL: @write_global_bool(
// CHECK-128-NEXT: entry:
-// CHECK-128-NEXT: [[V_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
-// CHECK-128-NEXT: store <vscale x 16 x i1> [[V:%.*]], <vscale x 16 x i1>* [[V_ADDR]], align 16, [[TBAA9:!tbaa !.*]]
-// CHECK-128-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[V_ADDR]] to <2 x i8>*
-// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 16, [[TBAA6]]
-// CHECK-128-NEXT: store <2 x i8> [[TMP1]], <2 x i8>* @global_bool, align 2, [[TBAA6]]
+// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-128-NEXT: store <vscale x 16 x i1> [[V:%.*]], <vscale x 16 x i1>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA9:![0-9]+]]
+// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_VALUE]] to <2 x i8>*
+// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
+// CHECK-128-NEXT: store <2 x i8> [[TMP0]], <2 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
// CHECK-128-NEXT: ret void
//
// CHECK-512-LABEL: @write_global_bool(
// CHECK-512-NEXT: entry:
-// CHECK-512-NEXT: [[V_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
-// CHECK-512-NEXT: store <vscale x 16 x i1> [[V:%.*]], <vscale x 16 x i1>* [[V_ADDR]], align 16, [[TBAA9:!tbaa !.*]]
-// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[V_ADDR]] to <8 x i8>*
-// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 16, [[TBAA6]]
-// CHECK-512-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* @global_bool, align 2, [[TBAA6]]
+// CHECK-512-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-512-NEXT: store <vscale x 16 x i1> [[V:%.*]], <vscale x 16 x i1>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA9:![0-9]+]]
+// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_VALUE]] to <8 x i8>*
+// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
+// CHECK-512-NEXT: store <8 x i8> [[TMP0]], <8 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
// CHECK-512-NEXT: ret void
//
void write_global_bool(svbool_t v) { global_bool = v; }
@@ -73,13 +73,13 @@ void write_global_bool(svbool_t v) { global_bool = v; }
// CHECK-128-LABEL: @read_global_i64(
// CHECK-128-NEXT: entry:
-// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* @global_i64, align 16, [[TBAA6]]
+// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* @global_i64, align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> [[TMP0]], i64 0)
// CHECK-128-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
//
// CHECK-512-LABEL: @read_global_i64(
// CHECK-512-NEXT: entry:
-// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* @global_i64, align 16, [[TBAA6]]
+// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* @global_i64, align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[TMP0]], i64 0)
// CHECK-512-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
//
@@ -87,13 +87,13 @@ svint64_t read_global_i64() { return global_i64; }
// CHECK-128-LABEL: @read_global_bf16(
// CHECK-128-NEXT: entry:
-// CHECK-128-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, <8 x bfloat>* @global_bf16, align 16, [[TBAA6]]
+// CHECK-128-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, <8 x bfloat>* @global_bf16, align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x bfloat> @llvm.experimental.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> undef, <8 x bfloat> [[TMP0]], i64 0)
// CHECK-128-NEXT: ret <vscale x 8 x bfloat> [[CASTSCALABLESVE]]
//
// CHECK-512-LABEL: @read_global_bf16(
// CHECK-512-NEXT: entry:
-// CHECK-512-NEXT: [[TMP0:%.*]] = load <32 x bfloat>, <32 x bfloat>* @global_bf16, align 16, [[TBAA6]]
+// CHECK-512-NEXT: [[TMP0:%.*]] = load <32 x bfloat>, <32 x bfloat>* @global_bf16, align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x bfloat> @llvm.experimental.vector.insert.nxv8bf16.v32bf16(<vscale x 8 x bfloat> undef, <32 x bfloat> [[TMP0]], i64 0)
// CHECK-512-NEXT: ret <vscale x 8 x bfloat> [[CASTSCALABLESVE]]
//
@@ -101,12 +101,20 @@ svbfloat16_t read_global_bf16() { return global_bf16; }
// CHECK-128-LABEL: @read_global_bool(
// CHECK-128-NEXT: entry:
-// CHECK-128-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* bitcast (<2 x i8>* @global_bool to <vscale x 16 x i1>*), align 2, [[TBAA6]]
-// CHECK-128-NEXT: ret <vscale x 16 x i1> [[TMP0]]
+// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <2 x i8>, align 16
+// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
+// CHECK-128-NEXT: store <2 x i8> [[TMP0]], <2 x i8>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA6]]
+// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <2 x i8>* [[SAVED_VALUE]] to <vscale x 16 x i1>*
+// CHECK-128-NEXT: [[TMP1:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
+// CHECK-128-NEXT: ret <vscale x 16 x i1> [[TMP1]]
//
// CHECK-512-LABEL: @read_global_bool(
// CHECK-512-NEXT: entry:
-// CHECK-512-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* bitcast (<8 x i8>* @global_bool to <vscale x 16 x i1>*), align 2, [[TBAA6]]
-// CHECK-512-NEXT: ret <vscale x 16 x i1> [[TMP0]]
+// CHECK-512-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 16
+// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
+// CHECK-512-NEXT: store <8 x i8> [[TMP0]], <8 x i8>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA6]]
+// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to <vscale x 16 x i1>*
+// CHECK-512-NEXT: [[TMP1:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
+// CHECK-512-NEXT: ret <vscale x 16 x i1> [[TMP1]]
//
svbool_t read_global_bool() { return global_bool; }
More information about the cfe-commits
mailing list