[llvm] 3ddd5a8 - [AArch64][SVE] Add more ptest removal tests
Cullen Rhodes via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 15 02:54:28 PST 2022
Author: Cullen Rhodes
Date: 2022-11-15T10:32:28Z
New Revision: 3ddd5a848f0da3fd54bd6f4d42441020c5d60735
URL: https://github.com/llvm/llvm-project/commit/3ddd5a848f0da3fd54bd6f4d42441020c5d60735
DIFF: https://github.com/llvm/llvm-project/commit/3ddd5a848f0da3fd54bd6f4d42441020c5d60735.diff
LOG: [AArch64][SVE] Add more ptest removal tests
Precommit test containing examples where PTEST is incorrectly optimized
away. Later patches will fix incorrect optimizations.
Reviewed By: bsmith
Differential Revision: https://reviews.llvm.org/D137715
Added:
Modified:
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
index 880d94cf3f1bf..0ccd882a28086 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
@@ -13,7 +13,7 @@ define i32 @cmple_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %a)
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
- %3 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
%conv = zext i1 %3 to i32
ret i32 %conv
}
@@ -29,7 +29,7 @@ define i32 @cmple_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmple.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
- %2 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
%conv = zext i1 %2 to i32
ret i32 %conv
}
@@ -43,7 +43,7 @@ define i32 @cmple_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
%2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmple.wide.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b)
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %2)
- %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
+ %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
%conv = zext i1 %4 to i32
ret i32 %conv
}
@@ -57,19 +57,436 @@ define i32 @cmple_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <v
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmple.wide.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b)
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
- %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
+ %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
%conv = zext i1 %4 to i32
ret i32 %conv
}
+; ==============================================================================
+; PTEST_OP(PG, CMP(PG, ...))
+; ==============================================================================
+
+;
+; PTEST_FIRST(PG, CMP8(PG, A, B)). PTEST is redundant.
+;
+define i1 @cmp8_ptest_first_px(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmp8_ptest_first_px:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cset w0, mi
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ ret i1 %2
+}
+
+;
+; PTEST_LAST(PG, CMP8(PG, A, B)). PTEST is redundant.
+;
+define i1 @cmp8_ptest_last_px(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmp8_ptest_last_px:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ ret i1 %2
+}
+
+;
+; PTEST_ANY(PG, CMP8(PG, A, B)). PTEST is redundant.
+;
+define i1 @cmp8_ptest_any_px(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmp8_ptest_any_px:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ ret i1 %2
+}
+
+;
+; PTEST_FIRST(PG, CMP32(PG, A, B)). Can't remove PTEST since PTEST.B vs CMP.S.
+;
+define i1 @cmp32_ptest_first_px(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmp32_ptest_first_px:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cset w0, mi
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
+ %4 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
+ ret i1 %4
+}
+
+;
+; PTEST_LAST(PG, CMP32(PG, A, B)). Can't remove PTEST since PTEST.B vs CMP.S.
+;
+define i1 @cmp32_ptest_last_px(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmp32_ptest_last_px:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
+ %4 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
+ ret i1 %4
+}
+
+;
+; PTEST_ANY(PG, CMP32(PG, A, B)). PTEST is redundant.
+;
+define i1 @cmp32_ptest_any_px(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmp32_ptest_any_px:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
+ %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
+ ret i1 %4
+}
+
+; ==============================================================================
+; PTEST_OP(X=CMP(PG, ...), X)
+; ==============================================================================
+
+;
+; PTEST_FIRST(X=CMP8(PG, A, B), X). PTEST is redundant if condition is changed
+; to any.
+;
+; Can't remove PTEST and keep the same condition (first), since the mask for
+; the implicit PTEST (PG) performed by the compare
diff ers from the mask
+; specified to the explicit PTEST and could have a
diff erent result.
+;
+; For example, consider
+;
+; PG=<1, 1, x, x>
+; Z0=<1, 2, x, x>
+; Z1=<2, 1, x, x>
+;
+; X=CMPLE(PG, Z0, Z1)
+; =<0, 1, x, x> NZCV=0xxx
+; PTEST(X, X), NZCV=1xxx
+;
+; where the first active flag (bit 'N' in NZCV) is set by the explicit PTEST,
+; but not by the implicit PTEST as part of the compare. However, given the
+; PTEST mask and source are the same, first is equivalent to any. The same
+; applies to last active.
+;
+define i1 @cmp8_ptest_first_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmp8_ptest_first_xx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cset w0, mi
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
+ ret i1 %2
+}
+
+;
+; PTEST_LAST(X=CMP8(PG, A, B), X). PTEST is redundant if condition is changed
+; to any.
+;
+define i1 @cmp8_ptest_last_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmp8_ptest_last_xx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
+ ret i1 %2
+}
+
+;
+; PTEST_ANY(X=CMP8(PG, A, B), X). PTEST is redundant.
+;
+define i1 @cmp8_ptest_any_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmp8_ptest_any_xx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
+ ret i1 %2
+}
+
+;
+; PTEST_FIRST(X=CMP32(PG, A, B), X). PTEST is redundant if condition is changed
+; to any.
+;
+define i1 @cmp32_ptest_first_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmp32_ptest_first_xx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cset w0, mi
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1> %2, <vscale x 4 x i1> %2)
+ ret i1 %3
+}
+
+;
+; PTEST_LAST(X=CMP32(PG, A, B), X). PTEST is redundant if condition is changed
+; to any.
+;
+define i1 @cmp32_ptest_last_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmp32_ptest_last_xx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv4i1(<vscale x 4 x i1> %2, <vscale x 4 x i1> %2)
+ ret i1 %3
+}
+
+;
+; PTEST_ANY(X=CMP32(PG, A, B), X). PTEST is redundant.
+;
+define i1 @cmp32_ptest_any_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmp32_ptest_any_xx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1> %2, <vscale x 4 x i1> %2)
+ ret i1 %3
+}
+
+; ==============================================================================
+; PTEST_OP(PTRUE_ALL, CMP(PG, ...))
+; ==============================================================================
+
+;
+; PTEST_FIRST(PTRUE_ALL, CMP8(PG, A, B)). Can't remove PTEST since mask is
+;
diff erent.
+;
+define i1 @cmp8_ptest_first_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmp8_ptest_first_ax:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cset w0, mi
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
+ ret i1 %3
+}
+
+;
+; PTEST_LAST(PTRUE_ALL, CMP8(PG, A, B)). Can't remove PTEST since mask is
+;
diff erent.
+;
+define i1 @cmp8_ptest_last_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmp8_ptest_last_ax:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
+ ret i1 %3
+}
+
+;
+; PTEST_ANY(PTRUE_ALL, CMP8(PG, A, B)). PTEST is redundant.
+;
+define i1 @cmp8_ptest_any_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmp8_ptest_any_ax:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
+ ret i1 %3
+}
+
+;
+; PTEST_FIRST(PTRUE_ALL, CMP32(PG, A, B)). Can't remove PTEST since mask is
+;
diff erent.
+;
+define i1 @cmp32_ptest_first_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmp32_ptest_first_ax:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cset w0, mi
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ %3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %4 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1> %3, <vscale x 4 x i1> %2)
+ ret i1 %4
+}
+
+;
+; PTEST_LAST(PTRUE_ALL, CMP32(PG, A, B)). Can't remove PTEST since mask is
+;
diff erent.
+;
+define i1 @cmp32_ptest_last_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmp32_ptest_last_ax:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ %3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %4 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv4i1(<vscale x 4 x i1> %3, <vscale x 4 x i1> %2)
+ ret i1 %4
+}
+
+;
+; PTEST_ANY(PTRUE_ALL, CMP32(PG, A, B)). PTEST is redundant.
+;
+define i1 @cmp32_ptest_any_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmp32_ptest_any_ax:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ %3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1> %3, <vscale x 4 x i1> %2)
+ ret i1 %4
+}
+
+; ==============================================================================
+; PTEST_OP(PTRUE_ALL, CMP(PTRUE_ALL, ...))
+; ==============================================================================
+
+;
+; PTEST_FIRST(PTRUE_ALL, CMP8(PTRUE_ALL, A, B)). PTEST is redundant.
+;
+define i1 @cmp8_ptest_first_aa(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmp8_ptest_first_aa:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cset w0, mi
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %2)
+ ret i1 %3
+}
+
+;
+; PTEST_LAST(PTRUE_ALL, CMP8(PTRUE_ALL, A, B)). PTEST is redundant.
+;
+define i1 @cmp8_ptest_last_aa(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmp8_ptest_last_aa:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %2)
+ ret i1 %3
+}
+
+;
+; PTEST_ANY(PTRUE_ALL, CMP8(PTRUE_ALL, A, B)). PTEST is redundant.
+;
+define i1 @cmp8_ptest_any_aa(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmp8_ptest_any_aa:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %2)
+ ret i1 %3
+}
+
+;
+; PTEST_FIRST(PTRUE_ALL, CMP32(PTRUE_ALL, A, B)). PTEST is redundant.
+;
+define i1 @cmp32_ptest_first_aa(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmp32_ptest_first_aa:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cset w0, mi
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %2)
+ ret i1 %3
+}
+
+;
+; PTEST_LAST(PTRUE_ALL, CMP32(PTRUE_ALL, A, B)). PTEST is redundant.
+;
+define i1 @cmp32_ptest_last_aa(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmp32_ptest_last_aa:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %2)
+ ret i1 %3
+}
+
+;
+; PTEST_ANY(PTRUE_ALL, CMP32(PTRUE_ALL, A, B)). PTEST is redundant.
+;
+define i1 @cmp32_ptest_any_aa(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmp32_ptest_any_aa:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %2)
+ ret i1 %3
+}
+
declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.cmple.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
declare <vscale x 8 x i1> @llvm.aarch64.sve.cmple.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
declare <vscale x 4 x i1> @llvm.aarch64.sve.cmple.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
-declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
+declare i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
+declare i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
+declare i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
+
+declare i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
+declare i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
+declare i1 @llvm.aarch64.sve.ptest.last.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
More information about the llvm-commits
mailing list