[llvm] dafd1f2 - [AArch64][SVE] Avoid using ptrue for unpredicated predicate AND.
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 27 05:00:44 PST 2022
Author: Sander de Smalen
Date: 2022-01-27T13:00:23Z
New Revision: dafd1f29da27c2bb9ed95cf4f3149c68492e4b19
URL: https://github.com/llvm/llvm-project/commit/dafd1f29da27c2bb9ed95cf4f3149c68492e4b19
DIFF: https://github.com/llvm/llvm-project/commit/dafd1f29da27c2bb9ed95cf4f3149c68492e4b19.diff
LOG: [AArch64][SVE] Avoid using ptrue for unpredicated predicate AND.
Reviewed By: david-arm
Differential Revision: https://reviews.llvm.org/D118146
Added:
Modified:
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/AArch64/sve-int-log.ll
llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 3e7c46e6e43a6..73a680465f6f6 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -734,7 +734,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm PFIRST : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>;
defm PNEXT : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>;
- defm AND_PPzPP : sve_int_pred_log<0b0000, "and", int_aarch64_sve_and_z, and>;
+ defm AND_PPzPP : sve_int_pred_log_and<0b0000, "and", int_aarch64_sve_and_z>;
defm BIC_PPzPP : sve_int_pred_log<0b0001, "bic", int_aarch64_sve_bic_z>;
defm EOR_PPzPP : sve_int_pred_log<0b0010, "eor", int_aarch64_sve_eor_z, xor>;
defm SEL_PPPP : sve_int_pred_log<0b0011, "sel", vselect>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index eb965261e1832..574b22124957b 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1633,6 +1633,18 @@ multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op,
!cast<Instruction>(NAME), PTRUE_D>;
}
+multiclass sve_int_pred_log_and<bits<4> opc, string asm, SDPatternOperator op> :
+ sve_int_pred_log<opc, asm, op> {
+ def : Pat<(nxv16i1 (and nxv16i1:$Op1, nxv16i1:$Op2)),
+ (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
+ def : Pat<(nxv8i1 (and nxv8i1:$Op1, nxv8i1:$Op2)),
+ (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
+ def : Pat<(nxv4i1 (and nxv4i1:$Op1, nxv4i1:$Op2)),
+ (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
+ def : Pat<(nxv2i1 (and nxv2i1:$Op1, nxv2i1:$Op2)),
+ (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
+}
+
//===----------------------------------------------------------------------===//
// SVE Logical Mask Immediate Group
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/sve-int-log.ll b/llvm/test/CodeGen/AArch64/sve-int-log.ll
index 2da05d30a9e02..e8bdf67cba8b4 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-log.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-log.ll
@@ -49,8 +49,7 @@ define <vscale x 16 x i8> @and_b_zero(<vscale x 16 x i8> %a) {
define <vscale x 2 x i1> @and_pred_d(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
; CHECK-LABEL: and_pred_d:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p2.d
-; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = and <vscale x 2 x i1> %a, %b
ret <vscale x 2 x i1> %res
@@ -59,8 +58,7 @@ define <vscale x 2 x i1> @and_pred_d(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b)
define <vscale x 4 x i1> @and_pred_s(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
; CHECK-LABEL: and_pred_s:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p2.s
-; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = and <vscale x 4 x i1> %a, %b
ret <vscale x 4 x i1> %res
@@ -69,8 +67,7 @@ define <vscale x 4 x i1> @and_pred_s(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b)
define <vscale x 8 x i1> @and_pred_h(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
; CHECK-LABEL: and_pred_h:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p2.h
-; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = and <vscale x 8 x i1> %a, %b
ret <vscale x 8 x i1> %res
@@ -79,8 +76,7 @@ define <vscale x 8 x i1> @and_pred_h(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b)
define <vscale x 16 x i1> @and_pred_b(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: and_pred_b:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p2.b
-; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = and <vscale x 16 x i1> %a, %b
ret <vscale x 16 x i1> %res
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
index dac524ad7f29a..a018c56c8e860 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
@@ -17,8 +17,7 @@ define <vscale x 16 x i1> @reinterpret_bool_from_h(<vscale x 8 x i1> %pg) {
; CHECK-LABEL: reinterpret_bool_from_h:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: ptrue p2.b
-; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg)
ret <vscale x 16 x i1> %out
@@ -28,8 +27,7 @@ define <vscale x 16 x i1> @reinterpret_bool_from_s(<vscale x 4 x i1> %pg) {
; CHECK-LABEL: reinterpret_bool_from_s:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: ptrue p2.b
-; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %pg)
ret <vscale x 16 x i1> %out
@@ -39,8 +37,7 @@ define <vscale x 16 x i1> @reinterpret_bool_from_d(<vscale x 2 x i1> %pg) {
; CHECK-LABEL: reinterpret_bool_from_d:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: ptrue p2.b
-; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
ret <vscale x 16 x i1> %out
diff --git a/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll b/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
index 2afcaf7cedd80..d24c540a18024 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
@@ -7,7 +7,7 @@ define i1 @andv_nxv32i1(<vscale x 32 x i1> %a) {
; CHECK-LABEL: andv_nxv32i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
-; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: not p0.b, p2/z, p0.b
; CHECK-NEXT: ptest p2, p0.b
; CHECK-NEXT: cset w0, eq
@@ -24,10 +24,10 @@ define i1 @andv_nxv64i1(<vscale x 64 x i1> %a) {
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: and p1.b, p1/z, p1.b, p3.b
+; CHECK-NEXT: and p0.b, p0/z, p0.b, p2.b
; CHECK-NEXT: ptrue p4.b
-; CHECK-NEXT: and p1.b, p4/z, p1.b, p3.b
-; CHECK-NEXT: and p0.b, p4/z, p0.b, p2.b
-; CHECK-NEXT: and p0.b, p4/z, p0.b, p1.b
+; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: not p0.b, p4/z, p0.b
; CHECK-NEXT: ptest p4, p0.b
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
@@ -73,7 +73,7 @@ define i1 @smaxv_nxv32i1(<vscale x 32 x i1> %a) {
; CHECK-LABEL: smaxv_nxv32i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
-; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: not p0.b, p2/z, p0.b
; CHECK-NEXT: ptest p2, p0.b
; CHECK-NEXT: cset w0, eq
@@ -116,7 +116,7 @@ define i1 @uminv_nxv32i1(<vscale x 32 x i1> %a) {
; CHECK-LABEL: uminv_nxv32i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
-; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: not p0.b, p2/z, p0.b
; CHECK-NEXT: ptest p2, p0.b
; CHECK-NEXT: cset w0, eq
More information about the llvm-commits
mailing list