[clang] [llvm] [Arch64][SVE] Lower svrev_* to llvm.vector.reverse (PR #116422)
Jorge Botto via cfe-commits
cfe-commits at lists.llvm.org
Sun Feb 9 02:55:25 PST 2025
https://github.com/jf-botto updated https://github.com/llvm/llvm-project/pull/116422
>From 75cc7d90fa8a7f0cde0df969577556ac1098256b Mon Sep 17 00:00:00 2001
From: Jorge Botto <jorge.botto.16 at ucl.ac.uk>
Date: Fri, 15 Nov 2024 18:56:54 +0000
Subject: [PATCH 1/4] Making Clang emit llvm.vector.reverse instead of
llvm.aarch64.sve.rev
---
clang/include/clang/Basic/arm_sve.td | 2 +-
.../AArch64/sve-intrinsics/acle_sve_rev.c | 44 +++++++++----------
2 files changed, 23 insertions(+), 23 deletions(-)
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index b20383e72e66a37..c954a6582171728 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1060,7 +1060,7 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
def SVEXT : SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckExtract, 1>]>;
defm SVLASTA : SVEPerm<"svlasta[_{d}]", "sPd", "aarch64_sve_lasta">;
defm SVLASTB : SVEPerm<"svlastb[_{d}]", "sPd", "aarch64_sve_lastb">;
-def SVREV : SInst<"svrev[_{d}]", "dd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_rev", [VerifyRuntimeMode]>;
+def SVREV : SInst<"svrev[_{d}]", "dd", "csilUcUsUiUlhfd", MergeNone, "vector_reverse", [VerifyRuntimeMode]>;
def SVSEL : SInst<"svsel[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_sel", [VerifyRuntimeMode]>;
def SVSPLICE : SInst<"svsplice[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_splice", [VerifyRuntimeMode]>;
def SVTBL : SInst<"svtbl[_{d}]", "ddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl", [VerifyRuntimeMode]>;
diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c
index 3c0ae7df79644fa..835d1c616aebcb0 100644
--- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c
+++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c
@@ -24,12 +24,12 @@
// CHECK-LABEL: @test_svrev_s8(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> [[OP:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> [[OP:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z13test_svrev_s8u10__SVInt8_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> [[OP:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> [[OP:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svint8_t test_svrev_s8(svint8_t op) MODE_ATTR
@@ -39,12 +39,12 @@ svint8_t test_svrev_s8(svint8_t op) MODE_ATTR
// CHECK-LABEL: @test_svrev_s16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> [[OP:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> [[OP:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z14test_svrev_s16u11__SVInt16_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> [[OP:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> [[OP:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
svint16_t test_svrev_s16(svint16_t op) MODE_ATTR
@@ -54,12 +54,12 @@ svint16_t test_svrev_s16(svint16_t op) MODE_ATTR
// CHECK-LABEL: @test_svrev_s32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> [[OP:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[OP:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z14test_svrev_s32u11__SVInt32_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> [[OP:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[OP:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svint32_t test_svrev_s32(svint32_t op) MODE_ATTR
@@ -69,12 +69,12 @@ svint32_t test_svrev_s32(svint32_t op) MODE_ATTR
// CHECK-LABEL: @test_svrev_s64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> [[OP:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> [[OP:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z14test_svrev_s64u11__SVInt64_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> [[OP:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> [[OP:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
svint64_t test_svrev_s64(svint64_t op) MODE_ATTR
@@ -84,12 +84,12 @@ svint64_t test_svrev_s64(svint64_t op) MODE_ATTR
// CHECK-LABEL: @test_svrev_u8(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> [[OP:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> [[OP:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z13test_svrev_u8u11__SVUint8_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> [[OP:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> [[OP:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svuint8_t test_svrev_u8(svuint8_t op) MODE_ATTR
@@ -99,12 +99,12 @@ svuint8_t test_svrev_u8(svuint8_t op) MODE_ATTR
// CHECK-LABEL: @test_svrev_u16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> [[OP:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> [[OP:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z14test_svrev_u16u12__SVUint16_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> [[OP:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> [[OP:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
svuint16_t test_svrev_u16(svuint16_t op) MODE_ATTR
@@ -114,12 +114,12 @@ svuint16_t test_svrev_u16(svuint16_t op) MODE_ATTR
// CHECK-LABEL: @test_svrev_u32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> [[OP:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[OP:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z14test_svrev_u32u12__SVUint32_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> [[OP:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[OP:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svuint32_t test_svrev_u32(svuint32_t op) MODE_ATTR
@@ -129,12 +129,12 @@ svuint32_t test_svrev_u32(svuint32_t op) MODE_ATTR
// CHECK-LABEL: @test_svrev_u64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> [[OP:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> [[OP:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z14test_svrev_u64u12__SVUint64_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> [[OP:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> [[OP:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
svuint64_t test_svrev_u64(svuint64_t op) MODE_ATTR
@@ -144,12 +144,12 @@ svuint64_t test_svrev_u64(svuint64_t op) MODE_ATTR
// CHECK-LABEL: @test_svrev_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half> [[OP:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> [[OP:%.*]])
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z14test_svrev_f16u13__SVFloat16_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half> [[OP:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> [[OP:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
svfloat16_t test_svrev_f16(svfloat16_t op) MODE_ATTR
@@ -159,12 +159,12 @@ svfloat16_t test_svrev_f16(svfloat16_t op) MODE_ATTR
// CHECK-LABEL: @test_svrev_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float> [[OP:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[OP:%.*]])
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z14test_svrev_f32u13__SVFloat32_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float> [[OP:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[OP:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
svfloat32_t test_svrev_f32(svfloat32_t op) MODE_ATTR
@@ -174,12 +174,12 @@ svfloat32_t test_svrev_f32(svfloat32_t op) MODE_ATTR
// CHECK-LABEL: @test_svrev_f64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double> [[OP:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> [[OP:%.*]])
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z14test_svrev_f64u13__SVFloat64_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double> [[OP:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> [[OP:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
svfloat64_t test_svrev_f64(svfloat64_t op) MODE_ATTR
>From 8386dde658d8572d943a09661e584af9f71b1158 Mon Sep 17 00:00:00 2001
From: Jorge Botto <jorge.botto.16 at ucl.ac.uk>
Date: Mon, 27 Jan 2025 20:21:17 +0000
Subject: [PATCH 2/4] Making Clang emit llvm.vector.reverse instead of
llvm.aarch64.sve.rev for svrev_bf16 and svrev_b8
---
clang/include/clang/Basic/arm_sve.td | 4 ++--
.../test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev-bfloat.c | 4 ++--
clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c | 4 ++--
3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index c954a6582171728..5b7c64490fff4ec 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1082,7 +1082,7 @@ def SVZIP2 : SInst<"svzip2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNo
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
def SVEXT_BF16 : SInst<"svext[_{d}]", "dddi", "b", MergeNone, "aarch64_sve_ext", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckExtract, 1>]>;
-def SVREV_BF16 : SInst<"svrev[_{d}]", "dd", "b", MergeNone, "aarch64_sve_rev", [VerifyRuntimeMode]>;
+def SVREV_BF16 : SInst<"svrev[_{d}]", "dd", "b", MergeNone, "vector_reverse", [VerifyRuntimeMode]>;
def SVSEL_BF16 : SInst<"svsel[_{d}]", "dPdd", "b", MergeNone, "aarch64_sve_sel", [VerifyRuntimeMode]>;
def SVSPLICE_BF16 : SInst<"svsplice[_{d}]", "dPdd", "b", MergeNone, "aarch64_sve_splice", [VerifyRuntimeMode]>;
def SVTRN1_BF16 : SInst<"svtrn1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_trn1", [VerifyRuntimeMode]>;
@@ -1093,7 +1093,7 @@ def SVZIP1_BF16 : SInst<"svzip1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve
def SVZIP2_BF16 : SInst<"svzip2[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip2", [VerifyRuntimeMode]>;
}
-def SVREV_B8 : SInst<"svrev_b8", "PP", "Pc", MergeNone, "aarch64_sve_rev", [VerifyRuntimeMode]>;
+def SVREV_B8 : SInst<"svrev_b8", "PP", "Pc", MergeNone, "vector_reverse", [VerifyRuntimeMode]>;
def SVREV_B16 : SInst<"svrev_b16", "PP", "Pc", MergeNone, "aarch64_sve_rev_b16", [IsOverloadNone, VerifyRuntimeMode]>;
def SVREV_B32 : SInst<"svrev_b32", "PP", "Pc", MergeNone, "aarch64_sve_rev_b32", [IsOverloadNone, VerifyRuntimeMode]>;
def SVREV_B64 : SInst<"svrev_b64", "PP", "Pc", MergeNone, "aarch64_sve_rev_b64", [IsOverloadNone, VerifyRuntimeMode]>;
diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev-bfloat.c
index 9b3e813fa969472..43c0da842001c88 100644
--- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev-bfloat.c
+++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev-bfloat.c
@@ -25,12 +25,12 @@
// CHECK-LABEL: @test_svrev_bf16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat> [[OP:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> [[OP:%.*]])
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z15test_svrev_bf16u14__SVBfloat16_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat> [[OP:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> [[OP:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
//
svbfloat16_t test_svrev_bf16(svbfloat16_t op) MODE_ATTR
diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c
index 835d1c616aebcb0..856f76e67afcf5a 100644
--- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c
+++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c
@@ -189,12 +189,12 @@ svfloat64_t test_svrev_f64(svfloat64_t op) MODE_ATTR
// CHECK-LABEL: @test_svrev_b8(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1> [[OP:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> [[OP:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z13test_svrev_b8u10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1> [[OP:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> [[OP:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svrev_b8(svbool_t op) MODE_ATTR
>From a10ac09fafbafe5fc3798437a5ebc940bf81022e Mon Sep 17 00:00:00 2001
From: Jorge Botto <jorge.botto.16 at ucl.ac.uk>
Date: Wed, 5 Feb 2025 00:00:34 +0000
Subject: [PATCH 3/4] Precommiting missing optimisation tests
---
llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll | 182 +++++++++++++++++++
1 file changed, 182 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll
diff --git a/llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll b/llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll
new file mode 100644
index 000000000000000..8455f2e5118ef7b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll
@@ -0,0 +1,182 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 16 x i1> @aarch64_sve_rev_inv(<vscale x 16 x i1> %0) {
+; CHECK-LABEL: aarch64_sve_rev_inv:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev p0.b, p0.b
+; CHECK-NEXT: rev p0.b, p0.b
+; CHECK-NEXT: ret
+entry:
+ %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev(<vscale x 16 x i1> %0)
+ %2 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev(<vscale x 16 x i1> %1)
+ ret <vscale x 16 x i1> %2
+}
+
+define <vscale x 16 x i1> @aarch64_sve_rev_b16_inv(<vscale x 16 x i1> %0) {
+; CHECK-LABEL: aarch64_sve_rev_b16_inv:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev p0.h, p0.h
+; CHECK-NEXT: rev p0.h, p0.h
+; CHECK-NEXT: ret
+entry:
+ %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> %0)
+ %2 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> %1)
+ ret <vscale x 16 x i1> %2
+}
+
+define <vscale x 16 x i1> @aarch64_sve_rev_b32_inv(<vscale x 16 x i1> %0) {
+; CHECK-LABEL: aarch64_sve_rev_b32_inv:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev p0.s, p0.s
+; CHECK-NEXT: rev p0.s, p0.s
+; CHECK-NEXT: ret
+entry:
+ %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> %0)
+ %2 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> %1)
+ ret <vscale x 16 x i1> %2
+}
+
+define <vscale x 16 x i1> @aarch64_sve_rev_b64_inv(<vscale x 16 x i1> %0) {
+; CHECK-LABEL: aarch64_sve_rev_b64_inv:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev p0.d, p0.d
+; CHECK-NEXT: rev p0.d, p0.d
+; CHECK-NEXT: ret
+entry:
+ %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> %0)
+ %2 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> %1)
+ ret <vscale x 16 x i1> %2
+}
+
+define <vscale x 4 x i32> @aarch64_sve_revb_inv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: aarch64_sve_revb_inv:
+; CHECK: // %bb.0:
+; CHECK-NEXT: revb z0.s, p0/m, z1.s
+; CHECK-NEXT: revb z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 16 x i8> @aarch64_sve_revd_inv(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: aarch64_sve_revd_inv:
+; CHECK: // %bb.0:
+; CHECK-NEXT: revd z0.q, p0/m, z1.q
+; CHECK-NEXT: revd z0.q, p0/m, z1.q
+; CHECK-NEXT: ret
+ %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
+ %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %1, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %2
+}
+
+define <vscale x 4 x i32> @aarch64_sve_revh_inv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: aarch64_sve_revh_inv:
+; CHECK: // %bb.0:
+; CHECK-NEXT: revh z0.s, p0/m, z1.s
+; CHECK-NEXT: revh z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 2 x i64> @aarch64_sve_revw_inv(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: aarch64_sve_revw_inv:
+; CHECK: // %bb.0:
+; CHECK-NEXT: revw z0.d, p0/m, z1.d
+; CHECK-NEXT: revw z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %2
+}
+
+define <vscale x 4 x i32> @test_aarch64_sve_revb_pg_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg1, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: test_aarch64_sve_revb_pg_mismatch:
+; CHECK: // %bb.0:
+; CHECK-NEXT: revb z0.s, p0/m, z1.s
+; CHECK-NEXT: revb z0.s, p1/m, z1.s
+; CHECK-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg1, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @test_aarch64_sve_revb_b_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> %b1) {
+; CHECK-LABEL: test_aarch64_sve_revb_b_mismatch:
+; CHECK: // %bb.0:
+; CHECK-NEXT: revb z0.s, p0/m, z1.s
+; CHECK-NEXT: revb z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b1)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 16 x i8> @test_aarch64_sve_revd_pg_mismatch(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i1> %pg1, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: test_aarch64_sve_revd_pg_mismatch:
+; CHECK: // %bb.0:
+; CHECK-NEXT: revd z0.q, p0/m, z1.q
+; CHECK-NEXT: revd z0.q, p1/m, z1.q
+; CHECK-NEXT: ret
+ %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
+ %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %1, <vscale x 16 x i1> %pg1, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %2
+}
+
+define <vscale x 16 x i8> @test_aarch64_sve_revd_b_mismatch(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b, <vscale x 16 x i8> %b1) {
+; CHECK-LABEL: test_aarch64_sve_revd_b_mismatch:
+; CHECK: // %bb.0:
+; CHECK-NEXT: revd z0.q, p0/m, z1.q
+; CHECK-NEXT: revd z0.q, p0/m, z2.q
+; CHECK-NEXT: ret
+ %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
+ %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %1, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b1)
+ ret <vscale x 16 x i8> %2
+}
+
+define <vscale x 4 x i32> @test_aarch64_sve_revh_pg_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg1, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: test_aarch64_sve_revh_pg_mismatch:
+; CHECK: // %bb.0:
+; CHECK-NEXT: revh z0.s, p0/m, z1.s
+; CHECK-NEXT: revh z0.s, p1/m, z1.s
+; CHECK-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg1, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @test_aarch64_sve_revh_b_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> %b1) {
+; CHECK-LABEL: test_aarch64_sve_revh_b_mismatch:
+; CHECK: // %bb.0:
+; CHECK-NEXT: revh z0.s, p0/m, z1.s
+; CHECK-NEXT: revh z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b1)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 2 x i64> @test_aarch64_sve_revw_pg_mismatch(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg1, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: test_aarch64_sve_revw_pg_mismatch:
+; CHECK: // %bb.0:
+; CHECK-NEXT: revw z0.d, p0/m, z1.d
+; CHECK-NEXT: revw z0.d, p1/m, z1.d
+; CHECK-NEXT: ret
+ %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> %pg1, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %2
+}
+
+define <vscale x 2 x i64> @test_aarch64_sve_revw_b_mismatch(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b, <vscale x 2 x i64> %b1) {
+; CHECK-LABEL: test_aarch64_sve_revw_b_mismatch:
+; CHECK: // %bb.0:
+; CHECK-NEXT: revw z0.d, p0/m, z1.d
+; CHECK-NEXT: revw z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+ %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b1)
+ ret <vscale x 2 x i64> %2
+}
>From 5a1829ff82467df54ef4f3dcbb65190a1aaa8fdd Mon Sep 17 00:00:00 2001
From: Jorge Botto <jorge.botto.16 at ucl.ac.uk>
Date: Thu, 6 Feb 2025 01:04:42 +0000
Subject: [PATCH 4/4] Adding missed optimisation
---
.../Target/AArch64/AArch64ISelLowering.cpp | 38 +++++++++++++++++++
llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll | 24 ++++--------
2 files changed, 46 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 34464d317beafe4..323fbde74bf1974 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -21962,6 +21962,35 @@ SDValue tryLowerPartialReductionToWideAdd(SDNode *N,
return DAG.getNode(TopOpcode, DL, AccVT, BottomNode, ExtOp);
}
+static SDValue foldRevInvolution(SDNode *N) {
+ SDValue InnerRev = N->getOperand(1);
+ if (!InnerRev.hasOneUse())
+ return SDValue();
+
+ unsigned OuterIId = getIntrinsicID(N);
+ unsigned InnerIId = getIntrinsicID(InnerRev.getNode());
+ if (OuterIId != InnerIId)
+ return SDValue();
+
+ switch (OuterIId) {
+ case Intrinsic::aarch64_sve_revb:
+ case Intrinsic::aarch64_sve_revd:
+ case Intrinsic::aarch64_sve_revh:
+ case Intrinsic::aarch64_sve_revw:
+ if (N->getOperand(2) != InnerRev.getOperand(2) ||
+ N->getOperand(3) != InnerRev.getOperand(3))
+ return SDValue();
+ [[fallthrough]];
+ case Intrinsic::aarch64_sve_rev:
+ case Intrinsic::aarch64_sve_rev_b16:
+ case Intrinsic::aarch64_sve_rev_b32:
+ case Intrinsic::aarch64_sve_rev_b64:
+ return InnerRev.getOperand(1);
+ default:
+ return SDValue();
+ }
+}
+
static SDValue performIntrinsicCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
@@ -22270,6 +22299,15 @@ static SDValue performIntrinsicCombine(SDNode *N,
return tryConvertSVEWideCompare(N, ISD::SETULT, DCI, DAG);
case Intrinsic::aarch64_sve_cmpls_wide:
return tryConvertSVEWideCompare(N, ISD::SETULE, DCI, DAG);
+ case Intrinsic::aarch64_sve_rev:
+ case Intrinsic::aarch64_sve_rev_b16:
+ case Intrinsic::aarch64_sve_rev_b32:
+ case Intrinsic::aarch64_sve_rev_b64:
+ case Intrinsic::aarch64_sve_revb:
+ case Intrinsic::aarch64_sve_revd:
+ case Intrinsic::aarch64_sve_revh:
+ case Intrinsic::aarch64_sve_revw:
+ return foldRevInvolution(N);
case Intrinsic::aarch64_sve_ptest_any:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::ANY_ACTIVE);
diff --git a/llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll b/llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll
index 8455f2e5118ef7b..984845363501b24 100644
--- a/llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll
@@ -4,8 +4,6 @@
define <vscale x 16 x i1> @aarch64_sve_rev_inv(<vscale x 16 x i1> %0) {
; CHECK-LABEL: aarch64_sve_rev_inv:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: rev p0.b, p0.b
-; CHECK-NEXT: rev p0.b, p0.b
; CHECK-NEXT: ret
entry:
%1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev(<vscale x 16 x i1> %0)
@@ -16,8 +14,6 @@ entry:
define <vscale x 16 x i1> @aarch64_sve_rev_b16_inv(<vscale x 16 x i1> %0) {
; CHECK-LABEL: aarch64_sve_rev_b16_inv:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: rev p0.h, p0.h
-; CHECK-NEXT: rev p0.h, p0.h
; CHECK-NEXT: ret
entry:
%1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> %0)
@@ -28,8 +24,6 @@ entry:
define <vscale x 16 x i1> @aarch64_sve_rev_b32_inv(<vscale x 16 x i1> %0) {
; CHECK-LABEL: aarch64_sve_rev_b32_inv:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: rev p0.s, p0.s
-; CHECK-NEXT: rev p0.s, p0.s
; CHECK-NEXT: ret
entry:
%1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> %0)
@@ -40,8 +34,6 @@ entry:
define <vscale x 16 x i1> @aarch64_sve_rev_b64_inv(<vscale x 16 x i1> %0) {
; CHECK-LABEL: aarch64_sve_rev_b64_inv:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: rev p0.d, p0.d
-; CHECK-NEXT: rev p0.d, p0.d
; CHECK-NEXT: ret
entry:
%1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> %0)
@@ -52,8 +44,6 @@ entry:
define <vscale x 4 x i32> @aarch64_sve_revb_inv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
; CHECK-LABEL: aarch64_sve_revb_inv:
; CHECK: // %bb.0:
-; CHECK-NEXT: revb z0.s, p0/m, z1.s
-; CHECK-NEXT: revb z0.s, p0/m, z1.s
; CHECK-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
%2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
@@ -63,8 +53,6 @@ define <vscale x 4 x i32> @aarch64_sve_revb_inv(<vscale x 4 x i32> %a, <vscale x
define <vscale x 16 x i8> @aarch64_sve_revd_inv(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
; CHECK-LABEL: aarch64_sve_revd_inv:
; CHECK: // %bb.0:
-; CHECK-NEXT: revd z0.q, p0/m, z1.q
-; CHECK-NEXT: revd z0.q, p0/m, z1.q
; CHECK-NEXT: ret
%1 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
%2 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %1, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
@@ -74,8 +62,6 @@ define <vscale x 16 x i8> @aarch64_sve_revd_inv(<vscale x 16 x i8> %a, <vscale x
define <vscale x 4 x i32> @aarch64_sve_revh_inv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
; CHECK-LABEL: aarch64_sve_revh_inv:
; CHECK: // %bb.0:
-; CHECK-NEXT: revh z0.s, p0/m, z1.s
-; CHECK-NEXT: revh z0.s, p0/m, z1.s
; CHECK-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
%2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
@@ -85,14 +71,13 @@ define <vscale x 4 x i32> @aarch64_sve_revh_inv(<vscale x 4 x i32> %a, <vscale x
define <vscale x 2 x i64> @aarch64_sve_revw_inv(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
; CHECK-LABEL: aarch64_sve_revw_inv:
; CHECK: // %bb.0:
-; CHECK-NEXT: revw z0.d, p0/m, z1.d
-; CHECK-NEXT: revw z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%1 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
%2 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %2
}
+; negative test
define <vscale x 4 x i32> @test_aarch64_sve_revb_pg_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg1, <vscale x 4 x i32> %b) {
; CHECK-LABEL: test_aarch64_sve_revb_pg_mismatch:
; CHECK: // %bb.0:
@@ -104,6 +89,7 @@ define <vscale x 4 x i32> @test_aarch64_sve_revb_pg_mismatch(<vscale x 4 x i32>
ret <vscale x 4 x i32> %2
}
+; negative test
define <vscale x 4 x i32> @test_aarch64_sve_revb_b_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> %b1) {
; CHECK-LABEL: test_aarch64_sve_revb_b_mismatch:
; CHECK: // %bb.0:
@@ -115,6 +101,7 @@ define <vscale x 4 x i32> @test_aarch64_sve_revb_b_mismatch(<vscale x 4 x i32> %
ret <vscale x 4 x i32> %2
}
+; negative test
define <vscale x 16 x i8> @test_aarch64_sve_revd_pg_mismatch(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i1> %pg1, <vscale x 16 x i8> %b) {
; CHECK-LABEL: test_aarch64_sve_revd_pg_mismatch:
; CHECK: // %bb.0:
@@ -126,6 +113,7 @@ define <vscale x 16 x i8> @test_aarch64_sve_revd_pg_mismatch(<vscale x 16 x i8>
ret <vscale x 16 x i8> %2
}
+; negative test
define <vscale x 16 x i8> @test_aarch64_sve_revd_b_mismatch(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b, <vscale x 16 x i8> %b1) {
; CHECK-LABEL: test_aarch64_sve_revd_b_mismatch:
; CHECK: // %bb.0:
@@ -137,6 +125,7 @@ define <vscale x 16 x i8> @test_aarch64_sve_revd_b_mismatch(<vscale x 16 x i8> %
ret <vscale x 16 x i8> %2
}
+; negative test
define <vscale x 4 x i32> @test_aarch64_sve_revh_pg_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg1, <vscale x 4 x i32> %b) {
; CHECK-LABEL: test_aarch64_sve_revh_pg_mismatch:
; CHECK: // %bb.0:
@@ -148,6 +137,7 @@ define <vscale x 4 x i32> @test_aarch64_sve_revh_pg_mismatch(<vscale x 4 x i32>
ret <vscale x 4 x i32> %2
}
+; negative test
define <vscale x 4 x i32> @test_aarch64_sve_revh_b_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> %b1) {
; CHECK-LABEL: test_aarch64_sve_revh_b_mismatch:
; CHECK: // %bb.0:
@@ -159,6 +149,7 @@ define <vscale x 4 x i32> @test_aarch64_sve_revh_b_mismatch(<vscale x 4 x i32> %
ret <vscale x 4 x i32> %2
}
+; negative test
define <vscale x 2 x i64> @test_aarch64_sve_revw_pg_mismatch(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg1, <vscale x 2 x i64> %b) {
; CHECK-LABEL: test_aarch64_sve_revw_pg_mismatch:
; CHECK: // %bb.0:
@@ -170,6 +161,7 @@ define <vscale x 2 x i64> @test_aarch64_sve_revw_pg_mismatch(<vscale x 2 x i64>
ret <vscale x 2 x i64> %2
}
+; negative test
define <vscale x 2 x i64> @test_aarch64_sve_revw_b_mismatch(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b, <vscale x 2 x i64> %b1) {
; CHECK-LABEL: test_aarch64_sve_revw_b_mismatch:
; CHECK: // %bb.0:
More information about the cfe-commits
mailing list