[llvm] 12ba751 - [NFC][LLVM][SME] Seperate streaming mode only intrinsic tests.
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 24 07:20:42 PDT 2025
Author: Paul Walker
Date: 2025-06-24T14:18:49Z
New Revision: 12ba75145efe3ada44374036d7d5b5e94e855283
URL: https://github.com/llvm/llvm-project/commit/12ba75145efe3ada44374036d7d5b5e94e855283
DIFF: https://github.com/llvm/llvm-project/commit/12ba75145efe3ada44374036d7d5b5e94e855283.diff
LOG: [NFC][LLVM][SME] Seperate streaming mode only intrinsic tests.
Added:
llvm/test/CodeGen/AArch64/sme2-intrinsics-fclamp.ll
llvm/test/CodeGen/AArch64/sme2-intrinsics-sclamp.ll
llvm/test/CodeGen/AArch64/sme2-intrinsics-uclamp.ll
llvm/test/CodeGen/AArch64/sve2-intrinsics-rax1.ll
llvm/test/CodeGen/AArch64/sve2-intrinsics-sm4.ll
llvm/test/CodeGen/AArch64/sve2p1-intrinsics-qcvtn.ll
Modified:
llvm/test/CodeGen/AArch64/sme2-intrinsics-qcvtn.ll
llvm/test/CodeGen/AArch64/sve2-intrinsics-crypto.ll
llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll
llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
llvm/test/CodeGen/AArch64/sve2p2-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fclamp.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fclamp.ll
new file mode 100644
index 0000000000000..b7ada9f1d3faa
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fclamp.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sme2 -force-streaming -verify-machineinstrs < %s | FileCheck %s
+
+target triple = "aarch64-linux-gnu"
+
+define { <vscale x 8 x half>, <vscale x 8 x half> } @test_fclamp_single_x2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d) {
+; CHECK-LABEL: test_fclamp_single_x2_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: fclamp { z0.h, z1.h }, z2.h, z3.h
+; CHECK-NEXT: ret
+ %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d)
+ ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
+}
+
+define { <vscale x 4 x float>, <vscale x 4 x float> } @test_fclamp_single_x2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d) {
+; CHECK-LABEL: test_fclamp_single_x2_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: fclamp { z0.s, z1.s }, z2.s, z3.s
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d)
+ ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
+}
+
+define { <vscale x 2 x double>, <vscale x 2 x double> } @test_fclamp_single_x2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d) {
+; CHECK-LABEL: test_fclamp_single_x2_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: fclamp { z0.d, z1.d }, z2.d, z3.d
+; CHECK-NEXT: ret
+ %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d)
+ ret { <vscale x 2 x double>, <vscale x 2 x double> } %res
+}
+
+
+define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @test_fclamp_single_x4_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d, <vscale x 8 x half> %e, <vscale x 8 x half> %f) {
+; CHECK-LABEL: test_fclamp_single_x4_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: fclamp { z0.h - z3.h }, z4.h, z5.h
+; CHECK-NEXT: ret
+ %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d, <vscale x 8 x half> %e, <vscale x 8 x half> %f)
+ ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
+}
+
+define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @test_fclamp_single_x4_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d, <vscale x 4 x float> %e, <vscale x 4 x float> %f) {
+; CHECK-LABEL: test_fclamp_single_x4_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: fclamp { z0.s - z3.s }, z4.s, z5.s
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d, <vscale x 4 x float> %e, <vscale x 4 x float> %f)
+ ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
+}
+
+define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @test_fclamp_single_x4_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d, <vscale x 2 x double> %e, <vscale x 2 x double> %f) {
+; CHECK-LABEL: test_fclamp_single_x4_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: fclamp { z0.d - z3.d }, z4.d, z5.d
+; CHECK-NEXT: ret
+ %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d, <vscale x 2 x double> %e, <vscale x 2 x double> %f)
+ ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
+}
+
+declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d, <vscale x 8 x half> %e, <vscale x 8 x half> %f)
+declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d, <vscale x 4 x float> %e, <vscale x 4 x float> %f)
+declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d, <vscale x 2 x double> %e, <vscale x 2 x double> %f)
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-qcvtn.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-qcvtn.ll
index 25f14d0a6c425..704b63edfd690 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-qcvtn.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-qcvtn.ll
@@ -1,22 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py$
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2,+bf16 -force-streaming -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -force-streaming -verify-machineinstrs < %s | FileCheck %s
;
; SQCVTN
;
-; x2
-define <vscale x 8 x i16 > @multi_vector_qcvtn_x2_s16_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
-; CHECK-LABEL: multi_vector_qcvtn_x2_s16_s32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov z3.d, z2.d
-; CHECK-NEXT: mov z2.d, z1.d
-; CHECK-NEXT: sqcvtn z0.h, { z2.s, z3.s }
-; CHECK-NEXT: ret
- %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtn.x2.nxv4i32(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2)
- ret <vscale x 8 x i16> %res
-}
-
; x4
define <vscale x 16 x i8 > @multi_vector_qcvtn_x4_s8_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
; CHECK-LABEL: multi_vector_qcvtn_x4_s8_s32:
@@ -48,18 +36,6 @@ define <vscale x 8 x i16> @multi_vector_qcvtn_x4_s16_s64(<vscale x 2 x i64> %unu
; UQCVTN
;
-; x2
-define <vscale x 8 x i16> @multi_vector_qcvtn_x2_u16_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) {
-; CHECK-LABEL: multi_vector_qcvtn_x2_u16_u32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov z3.d, z2.d
-; CHECK-NEXT: mov z2.d, z1.d
-; CHECK-NEXT: uqcvtn z0.h, { z2.s, z3.s }
-; CHECK-NEXT: ret
- %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uqcvtn.x2.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1)
- ret<vscale x 8 x i16> %res
-}
-
; x4
define <vscale x 16 x i8> @multi_vector_qcvtn_x4_u8_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
; CHECK-LABEL: multi_vector_qcvtn_x4_u8_u32:
@@ -91,17 +67,6 @@ define <vscale x 8 x i16> @multi_vector_qcvtn_x4_u16_u64(<vscale x 2 x i64> %unu
; SQCVTUN
;
-; x2
-define <vscale x 8 x i16 > @multi_vector_qcvtn_x2_s16_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
-; CHECK-LABEL: multi_vector_qcvtn_x2_s16_u32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov z3.d, z2.d
-; CHECK-NEXT: mov z2.d, z1.d
-; CHECK-NEXT: sqcvtun z0.h, { z2.s, z3.s }
-; CHECK-NEXT: ret
- %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtun.x2.nxv4i322(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2)
- ret <vscale x 8 x i16> %res
-}
; x4
define <vscale x 16 x i8> @multi_vector_qcvtn_x4_u8_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
; CHECK-LABEL: multi_vector_qcvtn_x4_u8_s32:
@@ -129,9 +94,6 @@ define <vscale x 8 x i16> @multi_vector_qcvtn_x4_u16_s64(<vscale x 2 x i64> %unu
ret <vscale x 8 x i16> %res
}
-declare <vscale x 8 x i16> @llvm.aarch64.sve.uqcvtn.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtn.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtun.x2.nxv4i322(<vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 16 x i8> @llvm.aarch64.sve.sqcvtn.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtn.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
declare <vscale x 16 x i8> @llvm.aarch64.sve.uqcvtn.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sclamp.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sclamp.ll
new file mode 100644
index 0000000000000..b9b8469d692bd
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sclamp.ll
@@ -0,0 +1,110 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sme2 -force-streaming -verify-machineinstrs < %s | FileCheck %s
+
+target triple = "aarch64-linux-gnu"
+
+define { <vscale x 16 x i8>, <vscale x 16 x i8> } @test_sclamp_single_x2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) {
+; CHECK-LABEL: test_sclamp_single_x2_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: sclamp { z0.b, z1.b }, z2.b, z3.b
+; CHECK-NEXT: ret
+ %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d)
+ ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
+}
+
+define { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_sclamp_single_x2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d) {
+; CHECK-LABEL: test_sclamp_single_x2_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: sclamp { z0.h, z1.h }, z2.h, z3.h
+; CHECK-NEXT: ret
+ %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d)
+ ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
+}
+
+define { <vscale x 4 x i32>, <vscale x 4 x i32> } @test_sclamp_single_x2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d) {
+; CHECK-LABEL: test_sclamp_single_x2_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: sclamp { z0.s, z1.s }, z2.s, z3.s
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d)
+ ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
+}
+
+define { <vscale x 2 x i64>, <vscale x 2 x i64> } @test_sclamp_single_x2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d) {
+; CHECK-LABEL: test_sclamp_single_x2_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: sclamp { z0.d, z1.d }, z2.d, z3.d
+; CHECK-NEXT: ret
+ %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d)
+ ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
+}
+
+define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @test_sclamp_single_x4_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f) {
+; CHECK-LABEL: test_sclamp_single_x4_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: sclamp { z0.b - z3.b }, z4.b, z5.b
+; CHECK-NEXT: ret
+ %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f)
+ ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
+}
+
+define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @test_sclamp_single_x4_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f) {
+; CHECK-LABEL: test_sclamp_single_x4_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: sclamp { z0.h - z3.h }, z4.h, z5.h
+; CHECK-NEXT: ret
+ %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f)
+ ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
+}
+
+define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @test_sclamp_single_x4_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f) {
+; CHECK-LABEL: test_sclamp_single_x4_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: sclamp { z0.s - z3.s }, z4.s, z5.s
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f)
+ ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
+}
+
+define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @test_sclamp_single_x4_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f) {
+; CHECK-LABEL: test_sclamp_single_x4_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: sclamp { z0.d - z3.d }, z4.d, z5.d
+; CHECK-NEXT: ret
+ %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f)
+ ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
+}
+
+declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f)
+declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f)
+declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f)
+declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f)
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-uclamp.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-uclamp.ll
new file mode 100644
index 0000000000000..be0391b67e476
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-uclamp.ll
@@ -0,0 +1,110 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sme2 -force-streaming -verify-machineinstrs < %s | FileCheck %s
+
+target triple = "aarch64-linux-gnu"
+
+define { <vscale x 16 x i8>, <vscale x 16 x i8> } @test_uclamp_single_x2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) {
+; CHECK-LABEL: test_uclamp_single_x2_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: uclamp { z0.b, z1.b }, z2.b, z3.b
+; CHECK-NEXT: ret
+ %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d)
+ ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
+}
+
+define { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_uclamp_single_x2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d) {
+; CHECK-LABEL: test_uclamp_single_x2_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: uclamp { z0.h, z1.h }, z2.h, z3.h
+; CHECK-NEXT: ret
+ %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d)
+ ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
+}
+
+define { <vscale x 4 x i32>, <vscale x 4 x i32> } @test_uclamp_single_x2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d) {
+; CHECK-LABEL: test_uclamp_single_x2_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: uclamp { z0.s, z1.s }, z2.s, z3.s
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d)
+ ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
+}
+
+define { <vscale x 2 x i64>, <vscale x 2 x i64> } @test_uclamp_single_x2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d) {
+; CHECK-LABEL: test_uclamp_single_x2_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: uclamp { z0.d, z1.d }, z2.d, z3.d
+; CHECK-NEXT: ret
+ %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d)
+ ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
+}
+
+define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @test_uclamp_single_x4_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f) {
+; CHECK-LABEL: test_uclamp_single_x4_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: uclamp { z0.b - z3.b }, z4.b, z5.b
+; CHECK-NEXT: ret
+ %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f)
+ ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
+}
+
+define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @test_uclamp_single_x4_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f) {
+; CHECK-LABEL: test_uclamp_single_x4_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: uclamp { z0.h - z3.h }, z4.h, z5.h
+; CHECK-NEXT: ret
+ %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f)
+ ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
+}
+
+define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @test_uclamp_single_x4_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f) {
+; CHECK-LABEL: test_uclamp_single_x4_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: uclamp { z0.s - z3.s }, z4.s, z5.s
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f)
+ ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
+}
+
+define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @test_uclamp_single_x4_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f) {
+; CHECK-LABEL: test_uclamp_single_x4_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT: uclamp { z0.d - z3.d }, z4.d, z5.d
+; CHECK-NEXT: ret
+ %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f)
+ ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
+}
+
+declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f)
+declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f)
+declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f)
+declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f)
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-crypto.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-crypto.ll
index f477fcbe1eb5b..fc08f2cdf94a9 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-crypto.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-crypto.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2-aes,+sve2-sha3,+sve2-sm4 < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2,+sve-aes,+sve2-sha3,+sve2-sm4 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2-aes < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2,+sve-aes < %s | FileCheck %s
;
; AESD
@@ -78,53 +78,7 @@ define <vscale x 16 x i8> @aesmc_i8(<vscale x 16 x i8> %a) {
ret <vscale x 16 x i8> %out
}
-;
-; RAX1
-;
-
-define <vscale x 2 x i64> @rax1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: rax1_i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: rax1 z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
- %out = call <vscale x 2 x i64> @llvm.aarch64.sve.rax1(<vscale x 2 x i64> %a,
- <vscale x 2 x i64> %b)
- ret <vscale x 2 x i64> %out
-}
-
-;
-; SM4E
-;
-
-define <vscale x 4 x i32> @sm4e_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
-; CHECK-LABEL: sm4e_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sm4e z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
- %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sm4e(<vscale x 4 x i32> %a,
- <vscale x 4 x i32> %b)
- ret <vscale x 4 x i32> %out
-}
-
-;
-; SM4EKEY
-;
-
-define <vscale x 4 x i32> @sm4ekey_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
-; CHECK-LABEL: sm4ekey_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sm4ekey z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
- %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sm4ekey(<vscale x 4 x i32> %a,
- <vscale x 4 x i32> %b)
- ret <vscale x 4 x i32> %out
-}
-
-
declare <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 16 x i8> @llvm.aarch64.sve.aesimc(<vscale x 16 x i8>)
declare <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 16 x i8> @llvm.aarch64.sve.aesmc(<vscale x 16 x i8>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.rax1(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.sm4e(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.sm4ekey(<vscale x 4 x i32>, <vscale x 4 x i32>)
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-rax1.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-rax1.ll
new file mode 100644
index 0000000000000..9590c3432ad2c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-rax1.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2-sha3 < %s | FileCheck %s
+
+;
+; RAX1
+;
+
+define <vscale x 2 x i64> @rax1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: rax1_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rax1 z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.rax1(<vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.rax1(<vscale x 2 x i64>, <vscale x 2 x i64>)
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-sm4.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-sm4.ll
new file mode 100644
index 0000000000000..de5af6ce0ae99
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-sm4.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2-sm4 < %s | FileCheck %s
+
+;
+; SM4E
+;
+
+define <vscale x 4 x i32> @sm4e_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sm4e_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sm4e z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sm4e(<vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+;
+; SM4EKEY
+;
+
+define <vscale x 4 x i32> @sm4ekey_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sm4ekey_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sm4ekey z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sm4ekey(<vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sm4e(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sm4ekey(<vscale x 4 x i32>, <vscale x 4 x i32>)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll
index 0a5a0b7cfcc88..726a8b4cfc666 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mattr=+sve2p1 -verify-machineinstrs < %s | FileCheck %s
target triple = "aarch64-linux-gnu"
-define <vscale x 8 x half> @test_fclamp_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
+define <vscale x 8 x half> @test_fclamp_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
; CHECK-LABEL: test_fclamp_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: fclamp z0.h, z1.h, z2.h
@@ -12,7 +12,7 @@ define <vscale x 8 x half> @test_fclamp_f16(<vscale x 8 x half> %a, <vscale x 8
ret <vscale x 8 x half> %res
}
-define <vscale x 4 x float> @test_fclamp_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
+define <vscale x 4 x float> @test_fclamp_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
; CHECK-LABEL: test_fclamp_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fclamp z0.s, z1.s, z2.s
@@ -21,7 +21,7 @@ define <vscale x 4 x float> @test_fclamp_f32(<vscale x 4 x float> %a, <vscale x
ret <vscale x 4 x float> %res
}
-define <vscale x 2 x double> @test_fclamp_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
+define <vscale x 2 x double> @test_fclamp_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
; CHECK-LABEL: test_fclamp_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: fclamp z0.d, z1.d, z2.d
@@ -30,91 +30,6 @@ define <vscale x 2 x double> @test_fclamp_f64(<vscale x 2 x double> %a, <vscale
ret <vscale x 2 x double> %res
}
-define { <vscale x 8 x half>, <vscale x 8 x half> } @test_fclamp_single_x2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d) #1 {
-; CHECK-LABEL: test_fclamp_single_x2_f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: fclamp { z0.h, z1.h }, z2.h, z3.h
-; CHECK-NEXT: ret
- %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d)
- ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
-}
-
-define { <vscale x 4 x float>, <vscale x 4 x float> } @test_fclamp_single_x2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d) #1 {
-; CHECK-LABEL: test_fclamp_single_x2_f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: fclamp { z0.s, z1.s }, z2.s, z3.s
-; CHECK-NEXT: ret
- %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d)
- ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
-}
-
-define { <vscale x 2 x double>, <vscale x 2 x double> } @test_fclamp_single_x2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d) #1 {
-; CHECK-LABEL: test_fclamp_single_x2_f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: fclamp { z0.d, z1.d }, z2.d, z3.d
-; CHECK-NEXT: ret
- %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d)
- ret { <vscale x 2 x double>, <vscale x 2 x double> } %res
-}
-
-
-define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @test_fclamp_single_x4_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d, <vscale x 8 x half> %e, <vscale x 8 x half> %f) #1 {
-; CHECK-LABEL: test_fclamp_single_x4_f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: fclamp { z0.h - z3.h }, z4.h, z5.h
-; CHECK-NEXT: ret
- %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d, <vscale x 8 x half> %e, <vscale x 8 x half> %f)
- ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
-}
-
-define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @test_fclamp_single_x4_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d, <vscale x 4 x float> %e, <vscale x 4 x float> %f) #1 {
-; CHECK-LABEL: test_fclamp_single_x4_f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: fclamp { z0.s - z3.s }, z4.s, z5.s
-; CHECK-NEXT: ret
- %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d, <vscale x 4 x float> %e, <vscale x 4 x float> %f)
- ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
-}
-
-define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @test_fclamp_single_x4_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d, <vscale x 2 x double> %e, <vscale x 2 x double> %f) #1 {
-; CHECK-LABEL: test_fclamp_single_x4_f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: fclamp { z0.d - z3.d }, z4.d, z5.d
-; CHECK-NEXT: ret
- %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d, <vscale x 2 x double> %e, <vscale x 2 x double> %f)
- ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
-}
-
-
-attributes #0 = { "target-features"="+sve2p1" }
-attributes #1 = { "target-features"="+sme2" "aarch64_pstate_sm_enabled" }
-
declare <vscale x 8 x half> @llvm.aarch64.sve.fclamp.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fclamp.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fclamp.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
-
-declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
-declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
-declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
-
-declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d, <vscale x 8 x half> %e, <vscale x 8 x half> %f)
-declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d, <vscale x 4 x float> %e, <vscale x 4 x float> %f)
-declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d, <vscale x 2 x double> %e, <vscale x 2 x double> %f)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-qcvtn.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-qcvtn.ll
new file mode 100644
index 0000000000000..8b40ac4c3c947
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-qcvtn.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py$
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s
+
+;
+; SQCVTN
+;
+
+; x2
+define <vscale x 8 x i16 > @multi_vector_qcvtn_x2_s16_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
+; CHECK-LABEL: multi_vector_qcvtn_x2_s16_s32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z3.d, z2.d
+; CHECK-NEXT: mov z2.d, z1.d
+; CHECK-NEXT: sqcvtn z0.h, { z2.s, z3.s }
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtn.x2.nxv4i32(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2)
+ ret <vscale x 8 x i16> %res
+}
+
+;
+; UQCVTN
+;
+
+; x2
+define <vscale x 8 x i16> @multi_vector_qcvtn_x2_u16_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) {
+; CHECK-LABEL: multi_vector_qcvtn_x2_u16_u32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z3.d, z2.d
+; CHECK-NEXT: mov z2.d, z1.d
+; CHECK-NEXT: uqcvtn z0.h, { z2.s, z3.s }
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uqcvtn.x2.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1)
+ ret<vscale x 8 x i16> %res
+}
+
+;
+; SQCVTUN
+;
+
+; x2
+define <vscale x 8 x i16 > @multi_vector_qcvtn_x2_s16_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
+; CHECK-LABEL: multi_vector_qcvtn_x2_s16_u32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z3.d, z2.d
+; CHECK-NEXT: mov z2.d, z1.d
+; CHECK-NEXT: sqcvtun z0.h, { z2.s, z3.s }
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtun.x2.nxv4i322(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2)
+ ret <vscale x 8 x i16> %res
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uqcvtn.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtn.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtun.x2.nxv4i322(<vscale x 4 x i32>, <vscale x 4 x i32>)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
index 0475601f1907e..0e798d3b89a19 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mattr=+sve2p1 -verify-machineinstrs < %s | FileCheck %s
target triple = "aarch64-linux-gnu"
-define <vscale x 16 x i8> @test_sclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
+define <vscale x 16 x i8> @test_sclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
; CHECK-LABEL: test_sclamp_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sclamp z0.b, z1.b, z2.b
@@ -12,7 +12,7 @@ define <vscale x 16 x i8> @test_sclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x
ret <vscale x 16 x i8> %res
}
-define <vscale x 8 x i16> @test_sclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
+define <vscale x 8 x i16> @test_sclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
; CHECK-LABEL: test_sclamp_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sclamp z0.h, z1.h, z2.h
@@ -21,7 +21,7 @@ define <vscale x 8 x i16> @test_sclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x
ret <vscale x 8 x i16> %res
}
-define <vscale x 4 x i32> @test_sclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
+define <vscale x 4 x i32> @test_sclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
; CHECK-LABEL: test_sclamp_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sclamp z0.s, z1.s, z2.s
@@ -30,7 +30,7 @@ define <vscale x 4 x i32> @test_sclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x
ret <vscale x 4 x i32> %res
}
-define <vscale x 2 x i64> @test_sclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
+define <vscale x 2 x i64> @test_sclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
; CHECK-LABEL: test_sclamp_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: sclamp z0.d, z1.d, z2.d
@@ -39,117 +39,7 @@ define <vscale x 2 x i64> @test_sclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x
ret <vscale x 2 x i64> %res
}
-define { <vscale x 16 x i8>, <vscale x 16 x i8> } @test_sclamp_single_x2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) #1 {
-; CHECK-LABEL: test_sclamp_single_x2_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: sclamp { z0.b, z1.b }, z2.b, z3.b
-; CHECK-NEXT: ret
- %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d)
- ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
-}
-
-define { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_sclamp_single_x2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d) #1 {
-; CHECK-LABEL: test_sclamp_single_x2_i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: sclamp { z0.h, z1.h }, z2.h, z3.h
-; CHECK-NEXT: ret
- %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d)
- ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
-}
-
-define { <vscale x 4 x i32>, <vscale x 4 x i32> } @test_sclamp_single_x2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d) #1 {
-; CHECK-LABEL: test_sclamp_single_x2_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: sclamp { z0.s, z1.s }, z2.s, z3.s
-; CHECK-NEXT: ret
- %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d)
- ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
-}
-
-define { <vscale x 2 x i64>, <vscale x 2 x i64> } @test_sclamp_single_x2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d) #1 {
-; CHECK-LABEL: test_sclamp_single_x2_i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: sclamp { z0.d, z1.d }, z2.d, z3.d
-; CHECK-NEXT: ret
- %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d)
- ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
-}
-
-define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @test_sclamp_single_x4_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f) #1 {
-; CHECK-LABEL: test_sclamp_single_x4_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: sclamp { z0.b - z3.b }, z4.b, z5.b
-; CHECK-NEXT: ret
- %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f)
- ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
-}
-
-define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @test_sclamp_single_x4_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f) #1 {
-; CHECK-LABEL: test_sclamp_single_x4_i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: sclamp { z0.h - z3.h }, z4.h, z5.h
-; CHECK-NEXT: ret
- %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f)
- ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
-}
-
-define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @test_sclamp_single_x4_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f) #1 {
-; CHECK-LABEL: test_sclamp_single_x4_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: sclamp { z0.s - z3.s }, z4.s, z5.s
-; CHECK-NEXT: ret
- %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f)
- ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
-}
-
-define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @test_sclamp_single_x4_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f) #1 {
-; CHECK-LABEL: test_sclamp_single_x4_i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: sclamp { z0.d - z3.d }, z4.d, z5.d
-; CHECK-NEXT: ret
- %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f)
- ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
-}
-
-
-attributes #0 = { "target-features"="+sve2p1" }
-attributes #1 = { "target-features"="+sme2" "aarch64_pstate_sm_enabled" }
-
declare <vscale x 16 x i8> @llvm.aarch64.sve.sclamp.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sclamp.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sclamp.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sclamp.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-
-declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-
-declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f)
-declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f)
-declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f)
-declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
index a39f6cfa3b4c5..dd278f4038b8d 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mattr=+sve2p1 -verify-machineinstrs < %s | FileCheck %s
target triple = "aarch64-linux-gnu"
-define <vscale x 16 x i8> @test_uclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
+define <vscale x 16 x i8> @test_uclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
; CHECK-LABEL: test_uclamp_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: uclamp z0.b, z1.b, z2.b
@@ -12,7 +12,7 @@ define <vscale x 16 x i8> @test_uclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x
ret <vscale x 16 x i8> %res
}
-define <vscale x 8 x i16> @test_uclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
+define <vscale x 8 x i16> @test_uclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
; CHECK-LABEL: test_uclamp_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: uclamp z0.h, z1.h, z2.h
@@ -21,7 +21,7 @@ define <vscale x 8 x i16> @test_uclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x
ret <vscale x 8 x i16> %res
}
-define <vscale x 4 x i32> @test_uclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
+define <vscale x 4 x i32> @test_uclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
; CHECK-LABEL: test_uclamp_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: uclamp z0.s, z1.s, z2.s
@@ -30,7 +30,7 @@ define <vscale x 4 x i32> @test_uclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x
ret <vscale x 4 x i32> %res
}
-define <vscale x 2 x i64> @test_uclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
+define <vscale x 2 x i64> @test_uclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
; CHECK-LABEL: test_uclamp_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: uclamp z0.d, z1.d, z2.d
@@ -39,117 +39,7 @@ define <vscale x 2 x i64> @test_uclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x
ret <vscale x 2 x i64> %res
}
-define { <vscale x 16 x i8>, <vscale x 16 x i8> } @test_uclamp_single_x2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) #1 {
-; CHECK-LABEL: test_uclamp_single_x2_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: uclamp { z0.b, z1.b }, z2.b, z3.b
-; CHECK-NEXT: ret
- %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d)
- ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
-}
-
-define { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_uclamp_single_x2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d) #1 {
-; CHECK-LABEL: test_uclamp_single_x2_i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: uclamp { z0.h, z1.h }, z2.h, z3.h
-; CHECK-NEXT: ret
- %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d)
- ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
-}
-
-define { <vscale x 4 x i32>, <vscale x 4 x i32> } @test_uclamp_single_x2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d) #1 {
-; CHECK-LABEL: test_uclamp_single_x2_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: uclamp { z0.s, z1.s }, z2.s, z3.s
-; CHECK-NEXT: ret
- %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d)
- ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
-}
-
-define { <vscale x 2 x i64>, <vscale x 2 x i64> } @test_uclamp_single_x2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d) #1 {
-; CHECK-LABEL: test_uclamp_single_x2_i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: uclamp { z0.d, z1.d }, z2.d, z3.d
-; CHECK-NEXT: ret
- %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d)
- ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
-}
-
-define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @test_uclamp_single_x4_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f) #1 {
-; CHECK-LABEL: test_uclamp_single_x4_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: uclamp { z0.b - z3.b }, z4.b, z5.b
-; CHECK-NEXT: ret
- %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f)
- ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
-}
-
-define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @test_uclamp_single_x4_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f) #1 {
-; CHECK-LABEL: test_uclamp_single_x4_i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: uclamp { z0.h - z3.h }, z4.h, z5.h
-; CHECK-NEXT: ret
- %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f)
- ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
-}
-
-define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @test_uclamp_single_x4_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f) #1 {
-; CHECK-LABEL: test_uclamp_single_x4_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: uclamp { z0.s - z3.s }, z4.s, z5.s
-; CHECK-NEXT: ret
- %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f)
- ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
-}
-
-define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @test_uclamp_single_x4_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f) #1 {
-; CHECK-LABEL: test_uclamp_single_x4_i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: uclamp { z0.d - z3.d }, z4.d, z5.d
-; CHECK-NEXT: ret
- %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f)
- ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
-}
-
-
-attributes #0 = { "target-features"="+sve2p1" }
-attributes #1 = { "target-features"="+sme2" "aarch64_pstate_sm_enabled" }
-
declare <vscale x 16 x i8> @llvm.aarch64.sve.uclamp.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.uclamp.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.uclamp.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.uclamp.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-
-declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-
-declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f)
-declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f)
-declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f)
-declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f)
diff --git a/llvm/test/CodeGen/AArch64/sve2p2-intrinsics.ll b/llvm/test/CodeGen/AArch64/sve2p2-intrinsics.ll
index 8d863dab7b47d..23f40331a530f 100644
--- a/llvm/test/CodeGen/AArch64/sve2p2-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p2-intrinsics.ll
@@ -26,6 +26,26 @@ define <vscale x 8 x i16> @compact_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
ret <vscale x 8 x i16> %out
}
+define <vscale x 4 x i32> @compact_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
+; CHECK-LABEL: compact_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: compact z0.s, p0, z0.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.compact.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @compact_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: compact_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: compact z0.d, p0, z0.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.compact.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %out
+}
+
define <vscale x 8 x half> @compact_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
; CHECK-LABEL: compact_f16:
; CHECK: // %bb.0:
@@ -36,6 +56,26 @@ define <vscale x 8 x half> @compact_f16(<vscale x 8 x i1> %pg, <vscale x 8 x hal
ret <vscale x 8 x half> %out
}
+define <vscale x 4 x float> @compact_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
+; CHECK-LABEL: compact_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: compact z0.s, p0, z0.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.compact.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @compact_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
+; CHECK-LABEL: compact_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: compact z0.d, p0, z0.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.compact.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a)
+ ret <vscale x 2 x double> %out
+}
+
define <vscale x 8 x bfloat> @compact_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) {
; CHECK-LABEL: compact_bf16:
; CHECK: // %bb.0:
@@ -48,5 +88,9 @@ define <vscale x 8 x bfloat> @compact_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x
declare <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.compact.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.compact.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
declare <vscale x 8 x half> @llvm.aarch64.sve.compact.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.compact.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.compact.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.compact.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>)
More information about the llvm-commits
mailing list