[llvm] [msan] Convert target("aarch64.svcount") from compile-time crash to MSan false negatives (PR #165028)
Thurston Dang via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 24 12:15:26 PDT 2025
https://github.com/thurstond created https://github.com/llvm/llvm-project/pull/165028
MSan currently crashes at compile-time when it encounters target("aarch64.svcount") (e.g., https://github.com/llvm/llvm-project/pull/164315). This patch duct-tapes MSan so that it won't crash at compile-time, and instead propagates a clean shadow (resulting in false negatives but not false positives).
>From 0ba8aa1a9de0fe4cf0eeb5436b61d2cd51ebd31d Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Fri, 24 Oct 2025 19:09:40 +0000
Subject: [PATCH] [msan] Convert target("aarch64.svcount") from clang crash to
MSan false negative
MSan currently crashes at compile-time when it encounters target("aarch64.svcount") (e.g., https://github.com/llvm/llvm-project/pull/164315). This patch duct-tapes MSan so that it won't crash at compile-time, and instead propagates a clean shadow (resulting in false negatives but not false positives).
---
.../Instrumentation/MemorySanitizer.cpp | 44 +-
.../AArch64/sme-aarch64-svcount-mini.ll | 10 +-
.../AArch64/sme-aarch64-svcount.ll | 83 ++-
.../AArch64/sme2-intrinsics-add-mini.ll | 17 +-
.../AArch64/sme2-intrinsics-add.ll | 485 +++++++++++++++++-
5 files changed, 625 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index b6cbecb6133f4..5949c2baf586f 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -226,6 +226,7 @@ static const Align kMinOriginAlignment = Align(4);
static const Align kShadowTLSAlignment = Align(8);
// These constants must be kept in sync with the ones in msan.h.
+// TODO: increase size to match SVE/SVE2/SME/SME2 limits
static const unsigned kParamTLSSize = 800;
static const unsigned kRetvalTLSSize = 800;
@@ -1544,6 +1545,27 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
}
+ static bool isAArch64SVCount(Type *Ty) {
+ if (!isa<TargetExtType>(Ty))
+ return false;
+
+ TargetExtType* TTy = cast<TargetExtType>(Ty);
+ return TTy->getName() == "aarch64.svcount";
+ }
+
+ // This is intended to match the "AArch64 Predicate-as-Counter Type" (aka
+ // 'target("aarch64.svcount")', but not e.g., <vscale x 4 x i32>.
+ static bool isScalableNonVectorType(Type *Ty) {
+ if (!isAArch64SVCount(Ty))
+ LLVM_DEBUG(dbgs() << "isScalableNonVectorType: Unexpected type " << *Ty << "\n");
+
+ return Ty->isScalableTy() && !isa<VectorType>(Ty);
+ }
+
+ static bool isScalableNonVectorType(Instruction *I) {
+ return isScalableNonVectorType(I->getType());
+ }
+
void materializeChecks() {
#ifndef NDEBUG
// For assert below.
@@ -1672,6 +1694,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
return Res;
}
+ if (isScalableNonVectorType(OrigTy)){
+ LLVM_DEBUG(dbgs() << "getShadowTy: Scalable non-vector type: " << *OrigTy << "\n");
+ return OrigTy;
+ }
+
uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
return IntegerType::get(*MS.C, TypeSize);
}
@@ -2185,8 +2212,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
<< *OrigIns << "\n");
return;
}
-#ifndef NDEBUG
+
Type *ShadowTy = Shadow->getType();
+ if (isScalableNonVectorType(ShadowTy)) {
+ LLVM_DEBUG(dbgs() << "Skipping check of scalable non-vector " << *Shadow
+ << " before " << *OrigIns << "\n");
+ return;
+ }
+#ifndef NDEBUG
assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy) ||
isa<StructType>(ShadowTy) || isa<ArrayType>(ShadowTy)) &&
"Can only insert checks for integer, vector, and aggregate shadow "
@@ -6972,6 +7005,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// an extra "select". This results in much more compact IR.
// Sa = select Sb, poisoned, (select b, Sc, Sd)
Sa1 = getPoisonedShadow(getShadowTy(I.getType()));
+ } else if (isScalableNonVectorType(&I)) {
+ // This is intended to handle target("aarch64.svcount"), which can't be
+ // handled in the else branch because of incompatibility with CreateXor
+ // ("The supported LLVM operations on this type are limited to load,
+ // store, phi, select and alloca instructions").
+
+ // TODO: this currently underapproximates. Use Arm SVE EOR in the else
+ // branch as needed instead.
+ Sa1 = getCleanShadow(getShadowTy(I.getType()));
} else {
// Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
// If Sb (condition is poisoned), look for bits in c and d that are equal
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll
index 1c869bd41b931..e7491e985fa26 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll
@@ -1,14 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=msan -mattr=+sme -o - %s
-
-; XFAIL: *
+; RUN: opt -S -passes=msan -mattr=+sme -o - %s | FileCheck %s
; Forked from llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll
-; Manually minimized to show MSan leads to a compiler crash
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-android9001"
define target("aarch64.svcount") @test_return_arg1(target("aarch64.svcount") %arg0, target("aarch64.svcount") %arg1) nounwind {
+; CHECK-LABEL: @test_return_arg1(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret target("aarch64.svcount") [[ARG1:%.*]]
+;
ret target("aarch64.svcount") %arg1
}
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll
index 00cf3204464d0..e1ea9e68aefc3 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll
@@ -1,7 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=msan -mattr=+sme -o - %s
-
-; XFAIL: *
+; RUN: opt -S -passes=msan -mattr=+sme -o - %s | FileCheck %s
; Forked from llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll
@@ -12,16 +10,49 @@ target triple = "aarch64--linux-android9001"
; Test simple loads, stores and return.
;
define target("aarch64.svcount") @test_load(ptr %ptr) nounwind {
+; CHECK-LABEL: @test_load(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[RES:%.*]] = load target("aarch64.svcount"), ptr [[PTR:%.*]], align 2
+; CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret target("aarch64.svcount") [[RES]]
+;
%res = load target("aarch64.svcount"), ptr %ptr
ret target("aarch64.svcount") %res
}
define void @test_store(ptr %ptr, target("aarch64.svcount") %val) nounwind {
+; CHECK-LABEL: @test_store(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr [[TMP3]], align 2
+; CHECK-NEXT: store target("aarch64.svcount") [[VAL:%.*]], ptr [[PTR]], align 2
+; CHECK-NEXT: ret void
+;
store target("aarch64.svcount") %val, ptr %ptr
ret void
}
define target("aarch64.svcount") @test_alloca_store_reload(target("aarch64.svcount") %val) nounwind {
+; CHECK-LABEL: @test_alloca_store_reload(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[PTR:%.*]] = alloca target("aarch64.svcount"), align 1
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PTR]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
+; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP5]], i8 0, i64 [[TMP2]], i1 false)
+; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576
+; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr [[TMP8]], align 2
+; CHECK-NEXT: store target("aarch64.svcount") [[VAL:%.*]], ptr [[PTR]], align 2
+; CHECK-NEXT: [[RES:%.*]] = load target("aarch64.svcount"), ptr [[PTR]], align 2
+; CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret target("aarch64.svcount") [[RES]]
+;
%ptr = alloca target("aarch64.svcount"), align 1
store target("aarch64.svcount") %val, ptr %ptr
%res = load target("aarch64.svcount"), ptr %ptr
@@ -33,10 +64,20 @@ define target("aarch64.svcount") @test_alloca_store_reload(target("aarch64.svcou
;
define target("aarch64.svcount") @test_return_arg1(target("aarch64.svcount") %arg0, target("aarch64.svcount") %arg1) nounwind {
+; CHECK-LABEL: @test_return_arg1(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret target("aarch64.svcount") [[ARG1:%.*]]
+;
ret target("aarch64.svcount") %arg1
}
define target("aarch64.svcount") @test_return_arg4(target("aarch64.svcount") %arg0, target("aarch64.svcount") %arg1, target("aarch64.svcount") %arg2, target("aarch64.svcount") %arg3, target("aarch64.svcount") %arg4) nounwind {
+; CHECK-LABEL: @test_return_arg4(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret target("aarch64.svcount") [[ARG4:%.*]]
+;
ret target("aarch64.svcount") %arg4
}
@@ -46,22 +87,58 @@ define target("aarch64.svcount") @test_return_arg4(target("aarch64.svcount") %ar
declare void @take_svcount_1(target("aarch64.svcount") %arg)
define void @test_pass_1arg(target("aarch64.svcount") %arg) nounwind {
+; CHECK-LABEL: @test_pass_1arg(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: call void @take_svcount_1(target("aarch64.svcount") [[ARG:%.*]])
+; CHECK-NEXT: ret void
+;
call void @take_svcount_1(target("aarch64.svcount") %arg)
ret void
}
declare void @take_svcount_5(target("aarch64.svcount") %arg0, target("aarch64.svcount") %arg1, target("aarch64.svcount") %arg2, target("aarch64.svcount") %arg3, target("aarch64.svcount") %arg4)
define void @test_pass_5args(target("aarch64.svcount") %arg) nounwind {
+; CHECK-LABEL: @test_pass_5args(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: call void @take_svcount_5(target("aarch64.svcount") [[ARG:%.*]], target("aarch64.svcount") [[ARG]], target("aarch64.svcount") [[ARG]], target("aarch64.svcount") [[ARG]], target("aarch64.svcount") [[ARG]])
+; CHECK-NEXT: ret void
+;
call void @take_svcount_5(target("aarch64.svcount") %arg, target("aarch64.svcount") %arg, target("aarch64.svcount") %arg, target("aarch64.svcount") %arg, target("aarch64.svcount") %arg)
ret void
}
define target("aarch64.svcount") @test_sel(target("aarch64.svcount") %x, target("aarch64.svcount") %y, i1 %cmp) sanitize_memory {
+; CHECK-LABEL: @test_sel(
+; CHECK-NEXT: [[TMP1:%.*]] = load i1, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[CMP:%.*]], target("aarch64.svcount") zeroinitializer, target("aarch64.svcount") zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[TMP1]], target("aarch64.svcount") zeroinitializer, target("aarch64.svcount") [[TMP2]]
+; CHECK-NEXT: [[X_Y:%.*]] = select i1 [[CMP]], target("aarch64.svcount") [[X:%.*]], target("aarch64.svcount") [[Y:%.*]]
+; CHECK-NEXT: store target("aarch64.svcount") [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret target("aarch64.svcount") [[X_Y]]
+;
%x.y = select i1 %cmp, target("aarch64.svcount") %x, target("aarch64.svcount") %y
ret target("aarch64.svcount") %x.y
}
define target("aarch64.svcount") @test_sel_cc(target("aarch64.svcount") %x, target("aarch64.svcount") %y, i32 %k) sanitize_memory {
+; CHECK-LABEL: @test_sel_cc(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[K:%.*]], -2147483648
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP4]], -2147483606
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], -2147483606
+; CHECK-NEXT: [[TMP8:%.*]] = xor i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[K]], 42
+; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[CMP]], target("aarch64.svcount") zeroinitializer, target("aarch64.svcount") zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[TMP8]], target("aarch64.svcount") zeroinitializer, target("aarch64.svcount") [[TMP9]]
+; CHECK-NEXT: [[X_Y:%.*]] = select i1 [[CMP]], target("aarch64.svcount") [[X:%.*]], target("aarch64.svcount") [[Y:%.*]]
+; CHECK-NEXT: store target("aarch64.svcount") [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret target("aarch64.svcount") [[X_Y]]
+;
%cmp = icmp sgt i32 %k, 42
%x.y = select i1 %cmp, target("aarch64.svcount") %x, target("aarch64.svcount") %y
ret target("aarch64.svcount") %x.y
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll
index 3f43efa233621..3ae73c5719c3a 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll
@@ -1,7 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s
-
-; XFAIL: *
+; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s | FileCheck %s
; Forked from llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll
; Manually reduced to show MSan leads to a compiler crash
@@ -10,6 +8,19 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-android9001"
define void @multi_vector_add_za_vg1x4_f32_tuple(i64 %stride, ptr %ptr) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x4_f32_tuple(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP5:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[TMP2]], ptr [[PTR:%.*]])
+; CHECK-NEXT: ret void
+;
%1 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
%2 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %1, ptr %ptr)
ret void
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll
index cd04373c11d20..8d00b930abf95 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll
@@ -1,7 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s
-
-; XFAIL: *
+; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s | FileCheck %s
; Forked from llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll
@@ -9,6 +7,27 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-android9001"
define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zm) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_write_single_za_vg1x2_i32(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7:[0-9]+]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE:%.*]], <vscale x 4 x i32> [[ZN0:%.*]], <vscale x 4 x i32> [[ZN1:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_7]], <vscale x 4 x i32> [[ZN0]], <vscale x 4 x i32> [[ZN1]], <vscale x 4 x i32> [[ZM]])
+; CHECK-NEXT: ret void
+;
call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 %slice,
<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
<vscale x 4 x i32> %zm)
@@ -20,6 +39,27 @@ define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4
}
define void @multi_vector_add_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zm) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_write_single_za_vg1x2_i64(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE:%.*]], <vscale x 2 x i64> [[ZN0:%.*]], <vscale x 2 x i64> [[ZN1:%.*]], <vscale x 2 x i64> [[ZM:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_7]], <vscale x 2 x i64> [[ZN0]], <vscale x 2 x i64> [[ZN1]], <vscale x 2 x i64> [[ZM]])
+; CHECK-NEXT: ret void
+;
call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 %slice,
<vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
<vscale x 2 x i64> %zm)
@@ -32,6 +72,27 @@ define void @multi_vector_add_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2
define void @multi_vector_add_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+; CHECK-LABEL: @multi_vector_add_write_single_za_vg1x4_i32(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE:%.*]], <vscale x 4 x i32> [[ZN0:%.*]], <vscale x 4 x i32> [[ZN1:%.*]], <vscale x 4 x i32> [[ZN2:%.*]], <vscale x 4 x i32> [[ZN3:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_7]], <vscale x 4 x i32> [[ZN0]], <vscale x 4 x i32> [[ZN1]], <vscale x 4 x i32> [[ZN2]], <vscale x 4 x i32> [[ZN3]], <vscale x 4 x i32> [[ZM]])
+; CHECK-NEXT: ret void
+;
<vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3,
<vscale x 4 x i32> %zm) sanitize_memory {
call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 %slice,
@@ -47,6 +108,27 @@ define void @multi_vector_add_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4
}
define void @multi_vector_add_write_single_za_vg1x4_i64(i32 %slice,
+; CHECK-LABEL: @multi_vector_add_write_single_za_vg1x4_i64(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE:%.*]], <vscale x 2 x i64> [[ZN0:%.*]], <vscale x 2 x i64> [[ZN1:%.*]], <vscale x 2 x i64> [[ZN2:%.*]], <vscale x 2 x i64> [[ZN3:%.*]], <vscale x 2 x i64> [[ZM:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_7]], <vscale x 2 x i64> [[ZN0]], <vscale x 2 x i64> [[ZN1]], <vscale x 2 x i64> [[ZN2]], <vscale x 2 x i64> [[ZN3]], <vscale x 2 x i64> [[ZM]])
+; CHECK-NEXT: ret void
+;
<vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
<vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3,
<vscale x 2 x i64> %zm) sanitize_memory {
@@ -64,6 +146,27 @@ define void @multi_vector_add_write_single_za_vg1x4_i64(i32 %slice,
define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+; CHECK-LABEL: @multi_vector_add_write_za_vg1x2_i32(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE:%.*]], <vscale x 4 x i32> [[ZN0:%.*]], <vscale x 4 x i32> [[ZN1:%.*]], <vscale x 4 x i32> [[ZM1:%.*]], <vscale x 4 x i32> [[ZM2:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_7]], <vscale x 4 x i32> [[ZN0]], <vscale x 4 x i32> [[ZN1]], <vscale x 4 x i32> [[ZM1]], <vscale x 4 x i32> [[ZM2]])
+; CHECK-NEXT: ret void
+;
<vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) sanitize_memory {
call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 %slice,
<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
@@ -77,6 +180,27 @@ define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32>
define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+; CHECK-LABEL: @multi_vector_add_write_za_vg1x2_i64(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE:%.*]], <vscale x 2 x i64> [[ZN0:%.*]], <vscale x 2 x i64> [[ZN1:%.*]], <vscale x 2 x i64> [[ZM1:%.*]], <vscale x 2 x i64> [[ZM2:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_7]], <vscale x 2 x i64> [[ZN0]], <vscale x 2 x i64> [[ZN1]], <vscale x 2 x i64> [[ZM1]], <vscale x 2 x i64> [[ZM2]])
+; CHECK-NEXT: ret void
+;
<vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) sanitize_memory {
call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 %slice,
<vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
@@ -91,6 +215,27 @@ define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64>
define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+; CHECK-LABEL: @multi_vector_add_write_za_vg1x4_i32(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE:%.*]], <vscale x 4 x i32> [[ZN0:%.*]], <vscale x 4 x i32> [[ZN1:%.*]], <vscale x 4 x i32> [[ZN2:%.*]], <vscale x 4 x i32> [[ZN3:%.*]], <vscale x 4 x i32> [[ZM0:%.*]], <vscale x 4 x i32> [[ZM1:%.*]], <vscale x 4 x i32> [[ZM2:%.*]], <vscale x 4 x i32> [[ZM3:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_7]], <vscale x 4 x i32> [[ZN0]], <vscale x 4 x i32> [[ZN1]], <vscale x 4 x i32> [[ZN2]], <vscale x 4 x i32> [[ZN3]], <vscale x 4 x i32> [[ZM0]], <vscale x 4 x i32> [[ZM1]], <vscale x 4 x i32> [[ZM2]], <vscale x 4 x i32> [[ZM3]])
+; CHECK-NEXT: ret void
+;
<vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3,
<vscale x 4 x i32> %zm0, <vscale x 4 x i32> %zm1,
<vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3) sanitize_memory {
@@ -109,6 +254,27 @@ define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32>
}
define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+; CHECK-LABEL: @multi_vector_add_write_za_vg1x4_i64(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE:%.*]], <vscale x 2 x i64> [[ZN0:%.*]], <vscale x 2 x i64> [[ZN1:%.*]], <vscale x 2 x i64> [[ZN2:%.*]], <vscale x 2 x i64> [[ZN3:%.*]], <vscale x 2 x i64> [[ZM0:%.*]], <vscale x 2 x i64> [[ZM1:%.*]], <vscale x 2 x i64> [[ZM2:%.*]], <vscale x 2 x i64> [[ZM3:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_7]], <vscale x 2 x i64> [[ZN0]], <vscale x 2 x i64> [[ZN1]], <vscale x 2 x i64> [[ZN2]], <vscale x 2 x i64> [[ZN3]], <vscale x 2 x i64> [[ZM0]], <vscale x 2 x i64> [[ZM1]], <vscale x 2 x i64> [[ZM2]], <vscale x 2 x i64> [[ZM3]])
+; CHECK-NEXT: ret void
+;
<vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3,
<vscale x 2 x i64> %zm0, <vscale x 2 x i64> %zm1,
<vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3) sanitize_memory {
@@ -127,6 +293,27 @@ define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64>
}
define void @multi_vector_add_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x2_i32(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE:%.*]], <vscale x 4 x i32> [[ZN0:%.*]], <vscale x 4 x i32> [[ZN1:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE_7]], <vscale x 4 x i32> [[ZN0]], <vscale x 4 x i32> [[ZN1]])
+; CHECK-NEXT: ret void
+;
call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 %slice,<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1)
%slice.7 = add i32 %slice, 7
call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 %slice.7, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1)
@@ -134,6 +321,27 @@ define void @multi_vector_add_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0,
}
define void @multi_vector_add_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x2_i64(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE:%.*]], <vscale x 2 x i64> [[ZN0:%.*]], <vscale x 2 x i64> [[ZN1:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE_7]], <vscale x 2 x i64> [[ZN0]], <vscale x 2 x i64> [[ZN1]])
+; CHECK-NEXT: ret void
+;
call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1)
%slice.7 = add i32 %slice, 7
call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 %slice.7, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1)
@@ -141,6 +349,27 @@ define void @multi_vector_add_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0,
}
define void @multi_vector_add_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x2_f32(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 [[SLICE:%.*]], <vscale x 4 x float> [[ZN0:%.*]], <vscale x 4 x float> [[ZN1:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 [[SLICE_7]], <vscale x 4 x float> [[ZN0]], <vscale x 4 x float> [[ZN1]])
+; CHECK-NEXT: ret void
+;
call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 %slice,
<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1)
%slice.7 = add i32 %slice, 7
@@ -150,6 +379,27 @@ define void @multi_vector_add_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0
}
define void @multi_vector_add_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x2_f64(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 [[SLICE:%.*]], <vscale x 2 x double> [[ZN0:%.*]], <vscale x 2 x double> [[ZN1:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 [[SLICE_7]], <vscale x 2 x double> [[ZN0]], <vscale x 2 x double> [[ZN1]])
+; CHECK-NEXT: ret void
+;
call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 %slice,
<vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1)
%slice.7 = add i32 %slice, 7
@@ -159,6 +409,36 @@ define void @multi_vector_add_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn
}
define void @multi_vector_add_za_vg1x2_f64_tuple(i64 %stride, ptr %ptr) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x2_f64_tuple(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP5:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") [[TMP2]], ptr [[PTR:%.*]])
+; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP5]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP5]], 1
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[STRIDE:%.*]]
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[_MSPROP]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP10:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") [[TMP2]], ptr [[ARRAYIDX2]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP10]], 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP10]], 1
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 0, <vscale x 2 x double> [[TMP6]], <vscale x 2 x double> [[TMP11]])
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 0, <vscale x 2 x double> [[TMP7]], <vscale x 2 x double> [[TMP12]])
+; CHECK-NEXT: ret void
+;
entry:
%0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
%1 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") %0, ptr %ptr)
@@ -175,6 +455,27 @@ entry:
define void @multi_vector_add_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x4_i32(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE:%.*]], <vscale x 4 x i32> [[ZN0:%.*]], <vscale x 4 x i32> [[ZN1:%.*]], <vscale x 4 x i32> [[ZN2:%.*]], <vscale x 4 x i32> [[ZN3:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE_7]], <vscale x 4 x i32> [[ZN0]], <vscale x 4 x i32> [[ZN1]], <vscale x 4 x i32> [[ZN2]], <vscale x 4 x i32> [[ZN3]])
+; CHECK-NEXT: ret void
+;
call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 %slice,
<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
<vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3)
@@ -186,6 +487,27 @@ define void @multi_vector_add_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0,
}
define void @multi_vector_add_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x4_i64(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE:%.*]], <vscale x 2 x i64> [[ZN0:%.*]], <vscale x 2 x i64> [[ZN1:%.*]], <vscale x 2 x i64> [[ZN2:%.*]], <vscale x 2 x i64> [[ZN3:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE_7]], <vscale x 2 x i64> [[ZN0]], <vscale x 2 x i64> [[ZN1]], <vscale x 2 x i64> [[ZN2]], <vscale x 2 x i64> [[ZN3]])
+; CHECK-NEXT: ret void
+;
call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 %slice,
<vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
<vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3)
@@ -197,6 +519,27 @@ define void @multi_vector_add_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0,
}
define void @multi_vector_add_za_vg1x4_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x4_f32(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 [[SLICE:%.*]], <vscale x 4 x float> [[ZN0:%.*]], <vscale x 4 x float> [[ZN1:%.*]], <vscale x 4 x float> [[ZN2:%.*]], <vscale x 4 x float> [[ZN3:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 [[SLICE_7]], <vscale x 4 x float> [[ZN0]], <vscale x 4 x float> [[ZN1]], <vscale x 4 x float> [[ZN2]], <vscale x 4 x float> [[ZN3]])
+; CHECK-NEXT: ret void
+;
call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 %slice,
<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1,
<vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3)
@@ -208,6 +551,73 @@ define void @multi_vector_add_za_vg1x4_f32(i32 %slice, <vscale x 4 x float> %zn0
}
define void @multi_vector_add_za_vg1x4_f32_tuple(i64 %stride, ptr %ptr) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x4_f32_tuple(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP5:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[TMP2]], ptr [[PTR:%.*]])
+; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP5]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP5]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP5]], 2
+; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP5]], 3
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[STRIDE:%.*]]
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[_MSPROP]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[TMP12:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[TMP2]], ptr [[ARRAYIDX2]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP12]], 0
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP12]], 1
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP12]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP12]], 3
+; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP17]], 0
+; CHECK-NEXT: [[MUL3:%.*]] = shl i64 [[STRIDE]], 1
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or i64 [[TMP0]], [[TMP18]]
+; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[MUL3]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[_MSPROP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]]
+; CHECK: 19:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 20:
+; CHECK-NEXT: [[TMP21:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[TMP2]], ptr [[ARRAYIDX4]])
+; CHECK-NEXT: [[TMP22:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP21]], 0
+; CHECK-NEXT: [[TMP23:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP21]], 1
+; CHECK-NEXT: [[TMP24:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP21]], 2
+; CHECK-NEXT: [[TMP25:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP21]], 3
+; CHECK-NEXT: [[MSPROP_MUL_CST:%.*]] = mul i64 [[TMP1]], 1
+; CHECK-NEXT: [[MUL5:%.*]] = mul i64 [[STRIDE]], 3
+; CHECK-NEXT: [[_MSPROP2:%.*]] = or i64 [[TMP0]], [[MSPROP_MUL_CST]]
+; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[MUL5]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[_MSPROP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP5]], label [[TMP26:%.*]], label [[TMP27:%.*]], !prof [[PROF1]]
+; CHECK: 26:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 27:
+; CHECK-NEXT: [[TMP28:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[TMP2]], ptr [[ARRAYIDX6]])
+; CHECK-NEXT: [[TMP29:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP28]], 0
+; CHECK-NEXT: [[TMP30:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP28]], 1
+; CHECK-NEXT: [[TMP31:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP28]], 2
+; CHECK-NEXT: [[TMP32:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP28]], 3
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> [[TMP6]], <vscale x 4 x float> [[TMP13]], <vscale x 4 x float> [[TMP22]], <vscale x 4 x float> [[TMP29]])
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> [[TMP7]], <vscale x 4 x float> [[TMP14]], <vscale x 4 x float> [[TMP23]], <vscale x 4 x float> [[TMP30]])
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> [[TMP8]], <vscale x 4 x float> [[TMP15]], <vscale x 4 x float> [[TMP24]], <vscale x 4 x float> [[TMP31]])
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> [[TMP9]], <vscale x 4 x float> [[TMP16]], <vscale x 4 x float> [[TMP25]], <vscale x 4 x float> [[TMP32]])
+; CHECK-NEXT: ret void
+;
entry:
%0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
%1 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %0, ptr %ptr)
@@ -243,6 +653,27 @@ entry:
}
define void @multi_vector_add_za_vg1x4_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3) sanitize_memory {
+; CHECK-LABEL: @multi_vector_add_za_vg1x4_f64(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 [[SLICE:%.*]], <vscale x 2 x double> [[ZN0:%.*]], <vscale x 2 x double> [[ZN1:%.*]], <vscale x 2 x double> [[ZN2:%.*]], <vscale x 2 x double> [[ZN3:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 [[SLICE_7]], <vscale x 2 x double> [[ZN0]], <vscale x 2 x double> [[ZN1]], <vscale x 2 x double> [[ZN2]], <vscale x 2 x double> [[ZN3]])
+; CHECK-NEXT: ret void
+;
call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 %slice,
<vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1,
<vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3)
@@ -255,6 +686,12 @@ define void @multi_vector_add_za_vg1x4_f64(i32 %slice, <vscale x 2 x double> %zn
define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_add_single_x2_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) sanitize_memory {
+; CHECK-LABEL: @multi_vec_add_single_x2_s8(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[RES:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.add.single.x2.nxv16i8(<vscale x 16 x i8> [[ZDN1:%.*]], <vscale x 16 x i8> [[ZDN2:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
+; CHECK-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[RES]]
+;
%res = call { <vscale x 16 x i8>, <vscale x 16 x i8> }
@llvm.aarch64.sve.add.single.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2,
<vscale x 16 x i8> %zm)
@@ -262,6 +699,12 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_add_single_x2_s8(<v
}
define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_add_single_x2_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) sanitize_memory {
+; CHECK-LABEL: @multi_vec_add_single_x2_s16(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[RES:%.*]] = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.add.single.x2.nxv8i16(<vscale x 8 x i16> [[ZDN1:%.*]], <vscale x 8 x i16> [[ZDN2:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
+; CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret { <vscale x 8 x i16>, <vscale x 8 x i16> } [[RES]]
+;
%res = call { <vscale x 8 x i16>, <vscale x 8 x i16> }
@llvm.aarch64.sve.add.single.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2,
<vscale x 8 x i16> %zm)
@@ -269,6 +712,12 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_add_single_x2_s16(<
}
define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_add_single_x2_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) sanitize_memory {
+; CHECK-LABEL: @multi_vec_add_single_x2_s32(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[RES:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.add.single.x2.nxv4i32(<vscale x 4 x i32> [[ZDN1:%.*]], <vscale x 4 x i32> [[ZDN2:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+; CHECK-NEXT: store { <vscale x 4 x i32>, <vscale x 4 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret { <vscale x 4 x i32>, <vscale x 4 x i32> } [[RES]]
+;
%res = call { <vscale x 4 x i32>, <vscale x 4 x i32> }
@llvm.aarch64.sve.add.single.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2,
<vscale x 4 x i32> %zm)
@@ -276,6 +725,12 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_add_single_x2_s32(<
}
define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_add_single_x2_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) sanitize_memory {
+; CHECK-LABEL: @multi_vec_add_single_x2_s64(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[RES:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.add.single.x2.nxv2i64(<vscale x 2 x i64> [[ZDN1:%.*]], <vscale x 2 x i64> [[ZDN2:%.*]], <vscale x 2 x i64> [[ZM:%.*]])
+; CHECK-NEXT: store { <vscale x 2 x i64>, <vscale x 2 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[RES]]
+;
%res = call { <vscale x 2 x i64>, <vscale x 2 x i64> }
@llvm.aarch64.sve.add.single.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2,
<vscale x 2 x i64> %zm)
@@ -284,6 +739,12 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_add_single_x2_s64(<
define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_add_single_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8>%zm) sanitize_memory {
+; CHECK-LABEL: @multi_vec_add_single_x4_s8(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[RES:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.add.single.x4.nxv16i8(<vscale x 16 x i8> [[ZDN1:%.*]], <vscale x 16 x i8> [[ZDN2:%.*]], <vscale x 16 x i8> [[ZDN3:%.*]], <vscale x 16 x i8> [[ZDN4:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
+; CHECK-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[RES]]
+;
%res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@llvm.aarch64.sve.add.single.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2,
<vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
@@ -292,6 +753,12 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
}
define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_add_x4_single_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) sanitize_memory {
+; CHECK-LABEL: @multi_vec_add_x4_single_s16(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[RES:%.*]] = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.add.single.x4.nxv8i16(<vscale x 8 x i16> [[ZDN1:%.*]], <vscale x 8 x i16> [[ZDN2:%.*]], <vscale x 8 x i16> [[ZDN3:%.*]], <vscale x 8 x i16> [[ZDN4:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
+; CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[RES]]
+;
%res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@llvm.aarch64.sve.add.single.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2,
<vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
@@ -300,6 +767,12 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
}
define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_add_x4_single_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) sanitize_memory {
+; CHECK-LABEL: @multi_vec_add_x4_single_s32(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[RES:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.add.single.x4.nxv4i32(<vscale x 4 x i32> [[ZDN1:%.*]], <vscale x 4 x i32> [[ZDN2:%.*]], <vscale x 4 x i32> [[ZDN3:%.*]], <vscale x 4 x i32> [[ZDN4:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+; CHECK-NEXT: store { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[RES]]
+;
%res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@llvm.aarch64.sve.add.single.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2,
<vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
@@ -308,6 +781,12 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
}
define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_add_x4_single_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) sanitize_memory {
+; CHECK-LABEL: @multi_vec_add_x4_single_s64(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[RES:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.add.single.x4.nxv2i64(<vscale x 2 x i64> [[ZDN1:%.*]], <vscale x 2 x i64> [[ZDN2:%.*]], <vscale x 2 x i64> [[ZDN3:%.*]], <vscale x 2 x i64> [[ZDN4:%.*]], <vscale x 2 x i64> [[ZM:%.*]])
+; CHECK-NEXT: store { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[RES]]
+;
%res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
@llvm.aarch64.sve.add.single.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2,
<vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
More information about the llvm-commits
mailing list