[llvm] 370ff43 - [AArch64][SVE] Consider more intrinsics in 'isZeroingInactiveLanes'.
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 26 07:08:59 PDT 2022
Author: Sander de Smalen
Date: 2022-07-26T15:07:41+01:00
New Revision: 370ff43a15c90eca61dfa5715c7da82f1a4709f8
URL: https://github.com/llvm/llvm-project/commit/370ff43a15c90eca61dfa5715c7da82f1a4709f8
DIFF: https://github.com/llvm/llvm-project/commit/370ff43a15c90eca61dfa5715c7da82f1a4709f8.diff
LOG: [AArch64][SVE] Consider more intrinsics in 'isZeroingInactiveLanes'.
This fixes some PTEST regressions introduced by D129282.
Reviewed By: paulwalker-arm
Differential Revision: https://reviews.llvm.org/D129851
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll
llvm/test/CodeGen/AArch64/sve-ptest.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index bc57afdaa341c..7df43c3f4ff54 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -255,6 +255,12 @@ static bool isZeroingInactiveLanes(SDValue Op) {
return false;
case Intrinsic::aarch64_sve_ptrue:
case Intrinsic::aarch64_sve_pnext:
+ case Intrinsic::aarch64_sve_cmpeq:
+ case Intrinsic::aarch64_sve_cmpne:
+ case Intrinsic::aarch64_sve_cmpge:
+ case Intrinsic::aarch64_sve_cmpgt:
+ case Intrinsic::aarch64_sve_cmphs:
+ case Intrinsic::aarch64_sve_cmphi:
case Intrinsic::aarch64_sve_cmpeq_wide:
case Intrinsic::aarch64_sve_cmpne_wide:
case Intrinsic::aarch64_sve_cmpge_wide:
@@ -265,6 +271,11 @@ static bool isZeroingInactiveLanes(SDValue Op) {
case Intrinsic::aarch64_sve_cmphi_wide:
case Intrinsic::aarch64_sve_cmplo_wide:
case Intrinsic::aarch64_sve_cmpls_wide:
+ case Intrinsic::aarch64_sve_fcmpeq:
+ case Intrinsic::aarch64_sve_fcmpne:
+ case Intrinsic::aarch64_sve_fcmpge:
+ case Intrinsic::aarch64_sve_fcmpgt:
+ case Intrinsic::aarch64_sve_fcmpuo:
return true;
}
}
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
index 48c1255e239c0..d0ea1ddb252bb 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
@@ -20,10 +20,7 @@ define i32 @cmpeq_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
define i32 @cmpeq_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmpeq_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b
; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpeq.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
index 77ee75efed24b..5dae689b82a72 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
@@ -20,10 +20,7 @@ define i32 @cmpge_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
define i32 @cmpge_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmpge_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
index f8ca4906e304b..c2dc452ad88f6 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
@@ -20,10 +20,7 @@ define i32 @cmpgt_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
define i32 @cmpgt_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmpgt_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b
; CHECK-NEXT: cmpgt p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpgt.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
index ff9f62784fdde..e4b45921ece68 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
@@ -20,10 +20,7 @@ define i32 @cmphi_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
define i32 @cmphi_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmphi_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b
; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmphi.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
index 7c30aff17201a..42906f6e9703d 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
@@ -20,10 +20,7 @@ define i32 @cmphs_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
define i32 @cmphs_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmphs_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b
; CHECK-NEXT: cmphs p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmphs.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll
index a95a9a09b4b2b..ba4bd4b497d2c 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll
@@ -20,10 +20,7 @@ define i32 @cmpne_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
define i32 @cmpne_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmpne_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest.ll b/llvm/test/CodeGen/AArch64/sve-ptest.ll
index 4c4d8b2ba8f42..ec5148886b39a 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest.ll
@@ -7,10 +7,8 @@
define i32 @fcmpeq_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fcmpeq_nxv4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b
-; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: ptest p0, p1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.fcmpeq.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
@@ -22,10 +20,8 @@ define i32 @fcmpeq_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vsca
define i32 @fcmpne_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fcmpne_nxv4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b
-; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: ptest p0, p1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.fcmpne.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
@@ -37,10 +33,8 @@ define i32 @fcmpne_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vsca
define i32 @fcmpge_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fcmpge_nxv4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b
-; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: ptest p0, p1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.fcmpge.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
@@ -52,10 +46,8 @@ define i32 @fcmpge_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vsca
define i32 @fcmpgt_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fcmpgt_nxv4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b
-; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: ptest p0, p1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.fcmpgt.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
@@ -67,10 +59,8 @@ define i32 @fcmpgt_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vsca
define i32 @fcmpuo_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fcmpuo_nxv4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b
-; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: ptest p0, p1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.fcmpuo.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
More information about the llvm-commits
mailing list