[clang] 0b41238 - [AArch64] Emit TBAA metadata for SVE load/store intrinsics
Sander de Smalen via cfe-commits
cfe-commits at lists.llvm.org
Fri Feb 11 01:01:01 PST 2022
Author: Sander de Smalen
Date: 2022-02-11T09:00:29Z
New Revision: 0b41238ae7f91bcc907a577377caa70721ffc400
URL: https://github.com/llvm/llvm-project/commit/0b41238ae7f91bcc907a577377caa70721ffc400
DIFF: https://github.com/llvm/llvm-project/commit/0b41238ae7f91bcc907a577377caa70721ffc400.diff
LOG: [AArch64] Emit TBAA metadata for SVE load/store intrinsics
In Clang we can attach TBAA metadata based on the load/store intrinsics
based on the operation's element type.
This also contains changes to InstCombine where the AArch64-specific
intrinsics are transformed into generic LLVM load/store operations,
to ensure that all metadata is transferred to the new instruction.
There will be some further work after this patch to also emit TBAA
metadata for SVE's gather/scatter- and struct load/store intrinsics.
Reviewed By: paulwalker-arm
Differential Revision: https://reviews.llvm.org/D119319
Added:
Modified:
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 310a1ea583ce..9705c118246f 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9028,7 +9028,10 @@ Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo());
Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
- Value *Load = Builder.CreateCall(F, {Predicate, BasePtr});
+ auto *Load =
+ cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
+ auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
+ CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
: Builder.CreateSExt(Load, VectorTy);
@@ -9056,7 +9059,11 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo());
Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
- return Builder.CreateCall(F, {Val, Predicate, BasePtr});
+ auto *Store =
+ cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
+ auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
+ CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
+ return Store;
}
// Limit the usage of scalable llvm IR generated by the ACLE by using the
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c
index 0fde476e9fe0..4620c0e28c69 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c
@@ -17,7 +17,7 @@
// CHECK-LABEL: @test_svld1_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
+// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer), !tbaa ![[CHAR:[0-9]+]]
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z13test_svld1_s8u10__SVBool_tPKa(
@@ -35,7 +35,7 @@ svint8_t test_svld1_s8(svbool_t pg, const int8_t *base)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
+// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer), !tbaa ![[SHORT:[0-9]+]]
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
//
// CPP-CHECK-LABEL: @_Z14test_svld1_s16u10__SVBool_tPKs(
@@ -54,7 +54,7 @@ svint16_t test_svld1_s16(svbool_t pg, const int16_t *base)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
+// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer), !tbaa ![[INT:[0-9]+]]
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
//
// CPP-CHECK-LABEL: @_Z14test_svld1_s32u10__SVBool_tPKi(
@@ -73,7 +73,7 @@ svint32_t test_svld1_s32(svbool_t pg, const int32_t *base)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
+// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer), !tbaa ![[LONG:[0-9]+]]
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
//
// CPP-CHECK-LABEL: @_Z14test_svld1_s64u10__SVBool_tPKl(
@@ -91,7 +91,7 @@ svint64_t test_svld1_s64(svbool_t pg, const int64_t *base)
// CHECK-LABEL: @test_svld1_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
+// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer), !tbaa ![[CHAR]]
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z13test_svld1_u8u10__SVBool_tPKh(
@@ -109,7 +109,7 @@ svuint8_t test_svld1_u8(svbool_t pg, const uint8_t *base)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
+// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer), !tbaa ![[SHORT]]
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
//
// CPP-CHECK-LABEL: @_Z14test_svld1_u16u10__SVBool_tPKt(
@@ -128,7 +128,7 @@ svuint16_t test_svld1_u16(svbool_t pg, const uint16_t *base)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
+// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer), !tbaa ![[INT]]
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
//
// CPP-CHECK-LABEL: @_Z14test_svld1_u32u10__SVBool_tPKj(
@@ -147,7 +147,7 @@ svuint32_t test_svld1_u32(svbool_t pg, const uint32_t *base)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
+// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer), !tbaa ![[LONG]]
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
//
// CPP-CHECK-LABEL: @_Z14test_svld1_u64u10__SVBool_tPKm(
@@ -166,7 +166,7 @@ svuint64_t test_svld1_u64(svbool_t pg, const uint64_t *base)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer)
+// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer), !tbaa ![[HALF:[0-9]+]]
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
//
// CPP-CHECK-LABEL: @_Z14test_svld1_f16u10__SVBool_tPKDh(
@@ -185,7 +185,7 @@ svfloat16_t test_svld1_f16(svbool_t pg, const float16_t *base)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer)
+// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer), !tbaa ![[FLOAT:[0-9]+]]
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]]
//
// CPP-CHECK-LABEL: @_Z14test_svld1_f32u10__SVBool_tPKf(
@@ -204,7 +204,7 @@ svfloat32_t test_svld1_f32(svbool_t pg, const float32_t *base)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer)
+// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer), !tbaa ![[DOUBLE:[0-9]+]]
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
//
// CPP-CHECK-LABEL: @_Z14test_svld1_f64u10__SVBool_tPKd(
@@ -224,7 +224,7 @@ svfloat64_t test_svld1_f64(svbool_t pg, const float64_t *base)
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <vscale x 16 x i8>*
-// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
+// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer), !tbaa ![[CHAR]]
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z18test_svld1_vnum_s8u10__SVBool_tPKal(
@@ -246,7 +246,7 @@ svint8_t test_svld1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 8 x i16>*
-// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
+// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer), !tbaa ![[SHORT]]
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
//
// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_s16u10__SVBool_tPKsl(
@@ -269,7 +269,7 @@ svint16_t test_svld1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 4 x i32>*
-// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
+// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer), !tbaa ![[INT]]
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
//
// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_s32u10__SVBool_tPKil(
@@ -292,7 +292,7 @@ svint32_t test_svld1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <vscale x 2 x i64>*
-// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
+// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer), !tbaa ![[LONG]]
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
//
// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_s64u10__SVBool_tPKll(
@@ -314,7 +314,7 @@ svint64_t test_svld1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <vscale x 16 x i8>*
-// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
+// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer), !tbaa ![[CHAR]]
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z18test_svld1_vnum_u8u10__SVBool_tPKhl(
@@ -336,7 +336,7 @@ svuint8_t test_svld1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 8 x i16>*
-// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
+// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer), !tbaa ![[SHORT]]
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
//
// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_u16u10__SVBool_tPKtl(
@@ -359,7 +359,7 @@ svuint16_t test_svld1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 4 x i32>*
-// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
+// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer), !tbaa ![[INT]]
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
//
// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_u32u10__SVBool_tPKjl(
@@ -382,7 +382,7 @@ svuint32_t test_svld1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <vscale x 2 x i64>*
-// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
+// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer), !tbaa ![[LONG]]
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
//
// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_u64u10__SVBool_tPKml(
@@ -405,7 +405,7 @@ svuint64_t test_svld1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = bitcast half* [[TMP2]] to <vscale x 8 x half>*
-// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer)
+// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer), !tbaa ![[HALF]]
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP4]]
//
// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_f16u10__SVBool_tPKDhl(
@@ -428,7 +428,7 @@ svfloat16_t test_svld1_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum
// CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <vscale x 4 x float>*
-// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer)
+// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer), !tbaa ![[FLOAT]]
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP4]]
//
// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_f32u10__SVBool_tPKfl(
@@ -451,7 +451,7 @@ svfloat32_t test_svld1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum
// CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[TMP2]] to <vscale x 2 x double>*
-// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer)
+// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer), !tbaa ![[DOUBLE]]
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP4]]
//
// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_f64u10__SVBool_tPKdl(
@@ -1151,3 +1151,18 @@ svfloat32_t test_svld1_gather_u32base_index_f32(svbool_t pg, svuint32_t bases, i
svfloat64_t test_svld1_gather_u64base_index_f64(svbool_t pg, svuint64_t bases, int64_t index) {
return SVE_ACLE_FUNC(svld1_gather, _u64base, _index_f64, )(pg, bases, index);
}
+
+// CHECK-DAG: ![[CHAR_TY:[0-9]+]] = !{!"omnipotent char", !8, i64 0}
+// CHECK-DAG: ![[CHAR]] = !{![[CHAR_TY]], ![[CHAR_TY]], i64 0}
+// CHECK-DAG: ![[SHORT_TY:[0-9]+]] = !{!"short", !{{.*}}, i64 0}
+// CHECK-DAG: ![[SHORT]] = !{![[SHORT_TY]], ![[SHORT_TY]], i64 0}
+// CHECK-DAG: ![[INT_TY:[0-9]+]] = !{!"int", !7, i64 0}
+// CHECK-DAG: ![[INT]] = !{![[INT_TY]], ![[INT_TY]], i64 0}
+// CHECK-DAG: ![[LONG_TY:[0-9]+]] = !{!"long", !7, i64 0}
+// CHECK-DAG: ![[LONG]] = !{![[LONG_TY]], ![[LONG_TY]], i64 0}
+// CHECK-DAG: ![[HALF_TY:[0-9]+]] = !{!"__fp16", !7, i64 0}
+// CHECK-DAG: ![[HALF]] = !{![[HALF_TY]], ![[HALF_TY]], i64 0}
+// CHECK-DAG: ![[FLOAT_TY:[0-9]+]] = !{!"float", !7, i64 0}
+// CHECK-DAG: ![[FLOAT]] = !{![[FLOAT_TY]], ![[FLOAT_TY]], i64 0}
+// CHECK-DAG: ![[DOUBLE_TY:[0-9]+]] = !{!"double", !7, i64 0}
+// CHECK-DAG: ![[DOUBLE]] = !{![[DOUBLE_TY]], ![[DOUBLE_TY]], i64 0}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index e0a54ccb22dd..7429c1b920d2 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -862,12 +862,14 @@ instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
if (isAllActivePredicate(Pred)) {
LoadInst *Load = Builder.CreateLoad(VecTy, VecPtr);
+ Load->copyMetadata(II);
return IC.replaceInstUsesWith(II, Load);
}
CallInst *MaskedLoad =
Builder.CreateMaskedLoad(VecTy, VecPtr, PtrOp->getPointerAlignment(DL),
Pred, ConstantAggregateZero::get(VecTy));
+ MaskedLoad->copyMetadata(II);
return IC.replaceInstUsesWith(II, MaskedLoad);
}
@@ -883,12 +885,14 @@ instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
Builder.CreateBitCast(PtrOp, VecOp->getType()->getPointerTo());
if (isAllActivePredicate(Pred)) {
- Builder.CreateStore(VecOp, VecPtr);
+ StoreInst *Store = Builder.CreateStore(VecOp, VecPtr);
+ Store->copyMetadata(II);
return IC.eraseInstFromFunction(II);
}
- Builder.CreateMaskedStore(VecOp, VecPtr, PtrOp->getPointerAlignment(DL),
- Pred);
+ CallInst *MaskedStore = Builder.CreateMaskedStore(
+ VecOp, VecPtr, PtrOp->getPointerAlignment(DL), Pred);
+ MaskedStore->copyMetadata(II);
return IC.eraseInstFromFunction(II);
}
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll
index c035ea04a1f4..d944a770d288 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll
@@ -6,24 +6,24 @@ target triple = "aarch64-unknown-linux-gnu"
define <vscale x 4 x i32> @combine_ld1(i32* %ptr) #0 {
; CHECK-LABEL: @combine_ld1(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
-; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], align 16
+; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], align 16, !annotation !0
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
- %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %1, i32* %ptr)
+ %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %1, i32* %ptr), !annotation !0
ret <vscale x 4 x i32> %2
}
define <vscale x 4 x i32> @combine_ld1_casted_predicate(i32* %ptr) #0 {
; CHECK-LABEL: @combine_ld1_casted_predicate(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
-; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], align 16
+; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], align 16, !annotation !0
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
%3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %2)
- %4 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %3, i32* %ptr)
+ %4 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %3, i32* %ptr), !annotation !0
ret <vscale x 4 x i32> %4
}
@@ -31,11 +31,11 @@ define <vscale x 4 x i32> @combine_ld1_masked(i32* %ptr) #0 {
; CHECK-LABEL: @combine_ld1_masked(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
-; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP2]], i32 1, <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP2]], i32 1, <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> zeroinitializer), !annotation !0
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
- %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %1, i32* %ptr)
+ %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %1, i32* %ptr), !annotation !0
ret <vscale x 4 x i32> %2
}
@@ -45,37 +45,37 @@ define <vscale x 8 x i16> @combine_ld1_masked_casted_predicate(i16* %ptr) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR:%.*]] to <vscale x 8 x i16>*
-; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP4]], i32 1, <vscale x 8 x i1> [[TMP3]], <vscale x 8 x i16> zeroinitializer)
+; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP4]], i32 1, <vscale x 8 x i1> [[TMP3]], <vscale x 8 x i16> zeroinitializer), !annotation !0
; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP5]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
%3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %2)
- %4 = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> %3, i16* %ptr)
+ %4 = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> %3, i16* %ptr), !annotation !0
ret <vscale x 8 x i16> %4
}
define void @combine_st1(<vscale x 4 x i32> %vec, i32* %ptr) #0 {
; CHECK-LABEL: @combine_st1(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
-; CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], <vscale x 4 x i32>* [[TMP1]], align 16
+; CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], <vscale x 4 x i32>* [[TMP1]], align 16, !annotation !0
; CHECK-NEXT: ret void
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
- call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %1, i32* %ptr)
+ call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %1, i32* %ptr), !annotation !0
ret void
}
define void @combine_st1_casted_predicate(<vscale x 4 x i32> %vec, i32* %ptr) #0 {
; CHECK-LABEL: @combine_st1_casted_predicate(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
-; CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], <vscale x 4 x i32>* [[TMP1]], align 16
+; CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], <vscale x 4 x i32>* [[TMP1]], align 16, !annotation !0
; CHECK-NEXT: ret void
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
%3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %2)
- call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %3, i32* %ptr)
+ call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %3, i32* %ptr), !annotation !0
ret void
}
@@ -83,11 +83,11 @@ define void @combine_st1_masked(<vscale x 4 x i32> %vec, i32* %ptr) #0 {
; CHECK-LABEL: @combine_st1_masked(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
-; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[VEC:%.*]], <vscale x 4 x i32>* [[TMP2]], i32 1, <vscale x 4 x i1> [[TMP1]])
+; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[VEC:%.*]], <vscale x 4 x i32>* [[TMP2]], i32 1, <vscale x 4 x i1> [[TMP1]]), !annotation !0
; CHECK-NEXT: ret void
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
- call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %1, i32* %ptr)
+ call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %1, i32* %ptr), !annotation !0
ret void
}
@@ -97,13 +97,13 @@ define void @combine_st1_masked_casted_predicate(<vscale x 8 x i16> %vec, i16* %
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR:%.*]] to <vscale x 8 x i16>*
-; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[VEC:%.*]], <vscale x 8 x i16>* [[TMP4]], i32 1, <vscale x 8 x i1> [[TMP3]])
+; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[VEC:%.*]], <vscale x 8 x i16>* [[TMP4]], i32 1, <vscale x 8 x i1> [[TMP3]]), !annotation !0
; CHECK-NEXT: ret void
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
%3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %2)
- call void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16> %vec, <vscale x 8 x i1> %3, i16* %ptr)
+ call void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16> %vec, <vscale x 8 x i1> %3, i16* %ptr), !annotation !0
ret void
}
@@ -124,3 +124,5 @@ declare void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>
declare void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
attributes #0 = { "target-features"="+sve" }
+
+!0 = !{!"some metadata"}
More information about the cfe-commits
mailing list