[llvm] [AArch64][SVE] Add intrinsincs to assembly mapping for svpmov (PR #81861)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 15 06:01:58 PST 2024
https://github.com/Lukacma created https://github.com/llvm/llvm-project/pull/81861
This patch enables translation of svpmov intrinsic to the correct assembly instruction, instead of function call.
>From dce7deab924507588740ec43daa25821c3206a71 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Thu, 15 Feb 2024 11:29:52 +0000
Subject: [PATCH] [AArch64][SVE] Add mapping to assembly for svpmov intrinsics
---
llvm/include/llvm/IR/IntrinsicsAArch64.td | 22 +++--
.../AArch64/sve2p1-intrinsics-pmov-to-pred.ll | 82 +++----------------
2 files changed, 27 insertions(+), 77 deletions(-)
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 921e5b95ae03e8..d044bf6858376d 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -1367,6 +1367,17 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
llvm_i32_ty,
llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
+
+ class SVE2_1VectorArg_Pred_Intrinsic
+ : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [llvm_anyvector_ty],
+ [IntrNoMem]>;
+
+ class SVE2_1VectorArgIndexed_Pred_Intrinsic
+ : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [llvm_anyvector_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
// NOTE: There is no relationship between these intrinsics beyond an attempt
// to reuse currently identical class definitions.
@@ -3610,15 +3621,10 @@ def int_aarch64_sve_extq : AdvSIMD_2VectorArgIndexed_Intrinsic;
//
// SVE2.1 - Move predicate to/from vector
//
-def int_aarch64_sve_pmov_to_pred_lane :
- DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
- [llvm_anyvector_ty, llvm_i32_ty],
- [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_aarch64_sve_pmov_to_pred_lane : SVE2_1VectorArgIndexed_Pred_Intrinsic;
+
-def int_aarch64_sve_pmov_to_pred_lane_zero :
- DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
- [llvm_anyvector_ty],
- [IntrNoMem]>;
+def int_aarch64_sve_pmov_to_pred_lane_zero : SVE2_1VectorArg_Pred_Intrinsic;
def int_aarch64_sve_pmov_to_vector_lane_merging :
DefaultAttrsIntrinsic<[llvm_anyvector_ty],
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-pred.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-pred.ll
index 7cae1d2c216b61..a592dcd4b8ce99 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-pred.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-pred.ll
@@ -4,12 +4,7 @@
define <vscale x 16 x i1> @test_pmov_to_pred_i8(<vscale x 16 x i8> %zn) {
; CHECK-LABEL: test_pmov_to_pred_i8:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.pred.lane.nxv16i8
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: pmov p0.b, z0
; CHECK-NEXT: ret
entry:
%res = call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv16i8(<vscale x 16 x i8> %zn, i32 0)
@@ -19,27 +14,10 @@ define <vscale x 16 x i1> @test_pmov_to_pred_i8(<vscale x 16 x i8> %zn) {
define <vscale x 8 x i1> @test_pmov_to_pred_i16(<vscale x 8 x i16> %zn) {
; CHECK-LABEL: test_pmov_to_pred_i16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: addvl sp, sp, #-2
-; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
-; CHECK-NEXT: .cfi_offset w30, -8
-; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: mov z8.d, z0.d
-; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16
-; CHECK-NEXT: mov z0.d, z8.d
-; CHECK-NEXT: mov w0, #1 // =0x1
-; CHECK-NEXT: mov p4.b, p0.b
-; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
-; CHECK-NEXT: eor p0.b, p1/z, p4.b, p0.b
-; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
-; CHECK-NEXT: addvl sp, sp, #2
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: pmov p1.h, z0[0]
+; CHECK-NEXT: pmov p2.h, z0[1]
+; CHECK-NEXT: eor p0.b, p0/z, p1.b, p2.b
; CHECK-NEXT: ret
entry:
%res1 = call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16(<vscale x 8 x i16> %zn, i32 0)
@@ -52,27 +30,10 @@ define <vscale x 8 x i1> @test_pmov_to_pred_i16(<vscale x 8 x i16> %zn) {
define <vscale x 4 x i1> @test_pmov_to_pred_i32(<vscale x 4 x i32> %zn) {
; CHECK-LABEL: test_pmov_to_pred_i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: addvl sp, sp, #-2
-; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
-; CHECK-NEXT: .cfi_offset w30, -8
-; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: mov z8.d, z0.d
-; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32
-; CHECK-NEXT: mov z0.d, z8.d
-; CHECK-NEXT: mov w0, #3 // =0x3
-; CHECK-NEXT: mov p4.b, p0.b
-; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
-; CHECK-NEXT: eor p0.b, p1/z, p4.b, p0.b
-; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
-; CHECK-NEXT: addvl sp, sp, #2
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: pmov p1.s, z0[0]
+; CHECK-NEXT: pmov p2.s, z0[3]
+; CHECK-NEXT: eor p0.b, p0/z, p1.b, p2.b
; CHECK-NEXT: ret
entry:
%res1 = call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32(<vscale x 4 x i32> %zn, i32 0)
@@ -85,27 +46,10 @@ define <vscale x 4 x i1> @test_pmov_to_pred_i32(<vscale x 4 x i32> %zn) {
define <vscale x 2 x i1> @test_pmov_to_pred_i64(<vscale x 2 x i64> %zn) {
; CHECK-LABEL: test_pmov_to_pred_i64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: addvl sp, sp, #-2
-; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
-; CHECK-NEXT: .cfi_offset w30, -8
-; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: mov z8.d, z0.d
-; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64
-; CHECK-NEXT: mov z0.d, z8.d
-; CHECK-NEXT: mov w0, #7 // =0x7
-; CHECK-NEXT: mov p4.b, p0.b
-; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
-; CHECK-NEXT: eor p0.b, p1/z, p4.b, p0.b
-; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
-; CHECK-NEXT: addvl sp, sp, #2
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: pmov p1.d, z0[0]
+; CHECK-NEXT: pmov p2.d, z0[7]
+; CHECK-NEXT: eor p0.b, p0/z, p1.b, p2.b
; CHECK-NEXT: ret
entry:
%res1 = call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64(<vscale x 2 x i64> %zn, i32 0)
More information about the llvm-commits
mailing list