[llvm] 6b9a9cf - [msan][test] Add another target("aarch64.svcount") test case (#164343)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 21 18:14:25 PDT 2025
Author: Thurston Dang
Date: 2025-10-21T18:14:21-07:00
New Revision: 6b9a9cf040d33ad7f9cd563a907b13e373313255
URL: https://github.com/llvm/llvm-project/commit/6b9a9cf040d33ad7f9cd563a907b13e373313255
DIFF: https://github.com/llvm/llvm-project/commit/6b9a9cf040d33ad7f9cd563a907b13e373313255.diff
LOG: [msan][test] Add another target("aarch64.svcount") test case (#164343)
This shows a crash that happens because MSan tries to check the shadow
of a target("aarch64.svcount")-sized argument.
This is the followup to
https://github.com/llvm/llvm-project/pull/164315. This also does a
drive-by fix of those test cases, to remove FileCheck (otherwise, even
if opt passed, the test would still XFAIL because FileCheck cannot find
any CHECK: assertions).
Forked from llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll
Added:
llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll
llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll
Modified:
llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll
llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll
Removed:
################################################################################
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll
index 1ddcd4b56688c..1c869bd41b931 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=msan -mattr=+sme -o - %s | FileCheck %s
+; RUN: opt -S -passes=msan -mattr=+sme -o - %s
; XFAIL: *
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll
index 9caa89de63748..00cf3204464d0 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=msan -mattr=+sme -o - %s | FileCheck %s
+; RUN: opt -S -passes=msan -mattr=+sme -o - %s
; XFAIL: *
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll
new file mode 100644
index 0000000000000..3f43efa233621
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s
+
+; XFAIL: *
+
+; Forked from llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll
+; Manually reduced to show MSan leads to a compiler crash
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-android9001"
+
+define void @multi_vector_add_za_vg1x4_f32_tuple(i64 %stride, ptr %ptr) sanitize_memory {
+ %1 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
+ %2 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %1, ptr %ptr)
+ ret void
+}
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll
new file mode 100644
index 0000000000000..cd04373c11d20
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll
@@ -0,0 +1,340 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s
+
+; XFAIL: *
+
+; Forked from llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-android9001"
+
+define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zm) sanitize_memory {
+ call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 %slice,
+ <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+ <vscale x 4 x i32> %zm)
+ %slice.7 = add i32 %slice, 7
+ call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 %slice.7,
+ <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+ <vscale x 4 x i32> %zm)
+ ret void
+}
+
+define void @multi_vector_add_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zm) sanitize_memory {
+ call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 %slice,
+ <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+ <vscale x 2 x i64> %zm)
+ %slice.7 = add i32 %slice, 7
+ call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 %slice.7,
+ <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+ <vscale x 2 x i64> %zm)
+ ret void
+}
+
+
+define void @multi_vector_add_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+ <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3,
+ <vscale x 4 x i32> %zm) sanitize_memory {
+ call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 %slice,
+ <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+ <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3,
+ <vscale x 4 x i32> %zm)
+ %slice.7 = add i32 %slice, 7
+ call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 %slice.7,
+ <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+ <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3,
+ <vscale x 4 x i32> %zm)
+ ret void
+}
+
+define void @multi_vector_add_write_single_za_vg1x4_i64(i32 %slice,
+ <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+ <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3,
+ <vscale x 2 x i64> %zm) sanitize_memory {
+ call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 %slice,
+ <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+ <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3,
+ <vscale x 2 x i64> %zm)
+ %slice.7 = add i32 %slice, 7
+ call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 %slice.7,
+ <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+ <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3,
+ <vscale x 2 x i64> %zm)
+ ret void
+}
+
+
+define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+ <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) sanitize_memory {
+ call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 %slice,
+ <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+ <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2)
+ %slice.7 = add i32 %slice, 7
+ call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 %slice.7,
+ <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+ <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2)
+ ret void
+}
+
+
+define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+ <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) sanitize_memory {
+ call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 %slice,
+ <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+ <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2)
+ %slice.7 = add i32 %slice, 7
+ call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 %slice.7,
+ <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+ <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2)
+ ret void
+}
+
+
+
+define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+ <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3,
+ <vscale x 4 x i32> %zm0, <vscale x 4 x i32> %zm1,
+ <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3) sanitize_memory {
+ call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 %slice,
+ <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+ <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3,
+ <vscale x 4 x i32> %zm0, <vscale x 4 x i32> %zm1,
+ <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3)
+ %slice.7 = add i32 %slice, 7
+ call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 %slice.7,
+ <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+ <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3,
+ <vscale x 4 x i32> %zm0, <vscale x 4 x i32> %zm1,
+ <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3)
+ ret void
+}
+
+define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+ <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3,
+ <vscale x 2 x i64> %zm0, <vscale x 2 x i64> %zm1,
+ <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3) sanitize_memory {
+ call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 %slice,
+ <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+ <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3,
+ <vscale x 2 x i64> %zm0, <vscale x 2 x i64> %zm1,
+ <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3)
+ %slice.7 = add i32 %slice, 7
+ call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 %slice.7,
+ <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+ <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3,
+ <vscale x 2 x i64> %zm0, <vscale x 2 x i64> %zm1,
+ <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3)
+ ret void
+}
+
+define void @multi_vector_add_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) sanitize_memory {
+ call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 %slice,<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1)
+ %slice.7 = add i32 %slice, 7
+ call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 %slice.7, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1)
+ ret void
+}
+
+define void @multi_vector_add_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1) sanitize_memory {
+ call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1)
+ %slice.7 = add i32 %slice, 7
+ call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 %slice.7, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1)
+ ret void
+}
+
+define void @multi_vector_add_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) sanitize_memory {
+ call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 %slice,
+ <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1)
+ %slice.7 = add i32 %slice, 7
+ call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 %slice.7,
+ <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1)
+ ret void
+}
+
+define void @multi_vector_add_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1) sanitize_memory {
+ call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 %slice,
+ <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1)
+ %slice.7 = add i32 %slice, 7
+ call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 %slice.7,
+ <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1)
+ ret void
+}
+
+define void @multi_vector_add_za_vg1x2_f64_tuple(i64 %stride, ptr %ptr) sanitize_memory {
+entry:
+ %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
+ %1 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") %0, ptr %ptr)
+ %2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %1, 0
+ %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %1, 1
+ %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride
+ %4 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") %0, ptr %arrayidx2)
+ %5 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %4, 0
+ %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %4, 1
+ call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 0, <vscale x 2 x double> %2, <vscale x 2 x double> %5)
+ call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 0, <vscale x 2 x double> %3, <vscale x 2 x double> %6)
+ ret void
+}
+
+
+define void @multi_vector_add_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) sanitize_memory {
+ call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 %slice,
+ <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+ <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3)
+ %slice.7 = add i32 %slice, 7
+ call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 %slice.7,
+ <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
+ <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3)
+ ret void
+}
+
+define void @multi_vector_add_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3) sanitize_memory {
+ call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 %slice,
+ <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+ <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3)
+ %slice.7 = add i32 %slice, 7
+ call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 %slice.7,
+ <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
+ <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3)
+ ret void
+}
+
+define void @multi_vector_add_za_vg1x4_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) sanitize_memory {
+ call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 %slice,
+ <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1,
+ <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3)
+ %slice.7 = add i32 %slice, 7
+ call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 %slice.7,
+ <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1,
+ <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3)
+ ret void
+}
+
+define void @multi_vector_add_za_vg1x4_f32_tuple(i64 %stride, ptr %ptr) sanitize_memory {
+entry:
+ %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
+ %1 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %0, ptr %ptr)
+ %2 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %1, 0
+ %3 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %1, 1
+ %4 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %1, 2
+ %5 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %1, 3
+ %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride
+ %6 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %0, ptr %arrayidx2)
+ %7 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %6, 0
+ %8 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %6, 1
+ %9 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %6, 2
+ %10 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %6, 3
+ %mul3 = shl i64 %stride, 1
+ %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3
+ %11 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %0, ptr %arrayidx4)
+ %12 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %11, 0
+ %13 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %11, 1
+ %14 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %11, 2
+ %15 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %11, 3
+ %mul5 = mul i64 %stride, 3
+ %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5
+ %16 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %0, ptr %arrayidx6)
+ %17 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %16, 0
+ %18 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %16, 1
+ %19 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %16, 2
+ %20 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %16, 3
+ call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> %2, <vscale x 4 x float> %7, <vscale x 4 x float> %12, <vscale x 4 x float> %17)
+ call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> %3, <vscale x 4 x float> %8, <vscale x 4 x float> %13, <vscale x 4 x float> %18)
+ call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> %4, <vscale x 4 x float> %9, <vscale x 4 x float> %14, <vscale x 4 x float> %19)
+ call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> %5, <vscale x 4 x float> %10, <vscale x 4 x float> %15, <vscale x 4 x float> %20)
+ ret void
+}
+
+define void @multi_vector_add_za_vg1x4_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3) sanitize_memory {
+ call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 %slice,
+ <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1,
+ <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3)
+ %slice.7 = add i32 %slice, 7
+ call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 %slice.7,
+ <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1,
+ <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3)
+ ret void
+}
+
+
+define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_add_single_x2_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) sanitize_memory {
+ %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> }
+ @llvm.aarch64.sve.add.single.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2,
+ <vscale x 16 x i8> %zm)
+ ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
+}
+
+define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_add_single_x2_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) sanitize_memory {
+ %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> }
+ @llvm.aarch64.sve.add.single.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2,
+ <vscale x 8 x i16> %zm)
+ ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
+}
+
+define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_add_single_x2_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) sanitize_memory {
+ %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> }
+ @llvm.aarch64.sve.add.single.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2,
+ <vscale x 4 x i32> %zm)
+ ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
+}
+
+define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_add_single_x2_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) sanitize_memory {
+ %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> }
+ @llvm.aarch64.sve.add.single.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2,
+ <vscale x 2 x i64> %zm)
+ ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
+}
+
+
+define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_add_single_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8>%zm) sanitize_memory {
+ %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
+ @llvm.aarch64.sve.add.single.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2,
+ <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
+ <vscale x 16 x i8> %zm)
+ ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
+}
+
+define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_add_x4_single_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) sanitize_memory {
+ %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
+ @llvm.aarch64.sve.add.single.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2,
+ <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
+ <vscale x 8 x i16> %zm)
+ ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
+}
+
+define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_add_x4_single_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) sanitize_memory {
+ %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
+ @llvm.aarch64.sve.add.single.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2,
+ <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
+ <vscale x 4 x i32> %zm)
+ ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
+}
+
+define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_add_x4_single_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) sanitize_memory {
+ %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
+ @llvm.aarch64.sve.add.single.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2,
+ <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
+ <vscale x 2 x i64> %zm)
+ ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
+}
+declare void at llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare void at llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare void at llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare void at llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare void at llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare void at llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare void at llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare void at llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare void at llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32, <vscale x 4 x i32>,<vscale x 4 x i32>)
+declare void at llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32, <vscale x 2 x i64>,<vscale x 2 x i64>)
+declare void at llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32, <vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>)
+declare void at llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32, <vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare void at llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32, <vscale x 4 x float>, <vscale x 4 x float>)
+declare void at llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32, <vscale x 2 x double>, <vscale x 2 x double>)
+declare void at llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32, <vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>)
+declare void at llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32, <vscale x 2 x double>, <vscale x 2 x double>,<vscale x 2 x double>, <vscale x 2 x double>)
+declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.add.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.add.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.add.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.add.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.add.single.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.add.single.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.add.single.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.add.single.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
More information about the llvm-commits
mailing list