[llvm] [LLVM][CodeGen][AArch64] Remove bogus lowering of sve_while{gt/ge/hi/hs} intrinsics. (PR #88126)
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 12 03:44:29 PDT 2024
https://github.com/paulwalker-arm updated https://github.com/llvm/llvm-project/pull/88126
>From ec99439bf75fc9464c60890bc75db9d9b2ea8bc3 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Fri, 5 Apr 2024 16:30:31 +0100
Subject: [PATCH 1/3] [LLVM][CodeGen][AArch64] Remove bogus lowering of
sve_while{gt/ge/hi/hs} intrinsics.
When fed constant operands we try to lower WHILE intrinsics to
PTRUE using a fixed length pattern. This is not valid for the
decrementing variants of WHILE because they construct their result
predicate vector by traversing from high->low lanes whereas the
incrementing variants and PTRUE traverse from low->high lanes.
Whilst we can still utilise PTRUE by reversing its result I figure
replacing a single WHILE with multiple SVE instructions is likely
counterproductive.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 33 ++--------
.../CodeGen/AArch64/sve2-intrinsics-while.ll | 66 +++++++++++--------
2 files changed, 46 insertions(+), 53 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 819e8ccd5c33f0..dc46dfc82b8c65 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5036,7 +5036,7 @@ static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
}
static SDValue optimizeWhile(SDValue Op, SelectionDAG &DAG, bool IsSigned,
- bool IsLess, bool IsEqual) {
+ bool IsEqual) {
if (!isa<ConstantSDNode>(Op.getOperand(1)) ||
!isa<ConstantSDNode>(Op.getOperand(2)))
return SDValue();
@@ -5044,12 +5044,9 @@ static SDValue optimizeWhile(SDValue Op, SelectionDAG &DAG, bool IsSigned,
SDLoc dl(Op);
APInt X = Op.getConstantOperandAPInt(1);
APInt Y = Op.getConstantOperandAPInt(2);
- APInt NumActiveElems;
bool Overflow;
- if (IsLess)
- NumActiveElems = IsSigned ? Y.ssub_ov(X, Overflow) : Y.usub_ov(X, Overflow);
- else
- NumActiveElems = IsSigned ? X.ssub_ov(Y, Overflow) : X.usub_ov(Y, Overflow);
+ APInt NumActiveElems =
+ IsSigned ? Y.ssub_ov(X, Overflow) : Y.usub_ov(X, Overflow);
if (Overflow)
return SDValue();
@@ -5396,29 +5393,13 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return SDValue();
}
case Intrinsic::aarch64_sve_whilelo:
- return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsLess=*/true,
- /*IsEqual=*/false);
+ return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsEqual=*/false);
case Intrinsic::aarch64_sve_whilelt:
- return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsLess=*/true,
- /*IsEqual=*/false);
+ return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsEqual=*/false);
case Intrinsic::aarch64_sve_whilels:
- return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsLess=*/true,
- /*IsEqual=*/true);
+ return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsEqual=*/true);
case Intrinsic::aarch64_sve_whilele:
- return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsLess=*/true,
- /*IsEqual=*/true);
- case Intrinsic::aarch64_sve_whilege:
- return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsLess=*/false,
- /*IsEqual=*/true);
- case Intrinsic::aarch64_sve_whilegt:
- return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsLess=*/false,
- /*IsEqual=*/false);
- case Intrinsic::aarch64_sve_whilehs:
- return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsLess=*/false,
- /*IsEqual=*/true);
- case Intrinsic::aarch64_sve_whilehi:
- return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsLess=*/false,
- /*IsEqual=*/false);
+ return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsEqual=*/true);
case Intrinsic::aarch64_sve_sunpkhi:
return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
Op.getOperand(1));
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll
index 9b82d79cfa9625..57795ea29c5dcd 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll
@@ -81,7 +81,7 @@ define <vscale x 2 x i1> @whilege_d_xx(i64 %a, i64 %b) {
define <vscale x 2 x i1> @whilege_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
; CHECK-LABEL: whilege_d_ii_dont_fold_to_ptrue_larger_than_minvec:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #3
+; CHECK-NEXT: mov w8, #3 // =0x3
; CHECK-NEXT: whilege p0.d, x8, xzr
; CHECK-NEXT: ret
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64 3, i64 0)
@@ -91,7 +91,9 @@ define <vscale x 2 x i1> @whilege_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
define <vscale x 16 x i1> @whilege_b_ii() {
; CHECK-LABEL: whilege_b_ii:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.b, vl6
+; CHECK-NEXT: mov w8, #-2 // =0xfffffffe
+; CHECK-NEXT: mov w9, #3 // =0x3
+; CHECK-NEXT: whilege p0.b, w9, w8
; CHECK-NEXT: ret
entry:
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 3, i32 -2)
@@ -101,7 +103,7 @@ entry:
define <vscale x 16 x i1> @whilege_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
; CHECK-LABEL: whilege_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #9
+; CHECK-NEXT: mov w8, #9 // =0x9
; CHECK-NEXT: whilege p0.b, x8, xzr
; CHECK-NEXT: ret
entry:
@@ -112,7 +114,8 @@ entry:
define <vscale x 16 x i1> @whilege_b_ii_vl_maximum() vscale_range(16, 16) {
; CHECK-LABEL: whilege_b_ii_vl_maximum:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b, vl256
+; CHECK-NEXT: mov w8, #255 // =0xff
+; CHECK-NEXT: whilege p0.b, x8, xzr
; CHECK-NEXT: ret
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 255, i64 0)
ret <vscale x 16 x i1> %out
@@ -121,8 +124,8 @@ define <vscale x 16 x i1> @whilege_b_ii_vl_maximum() vscale_range(16, 16) {
define <vscale x 16 x i1> @whilege_b_ii_dont_fold_to_ptrue_overflow() {
; CHECK-LABEL: whilege_b_ii_dont_fold_to_ptrue_overflow:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #2
-; CHECK-NEXT: mov w9, #2147483647
+; CHECK-NEXT: mov w8, #2 // =0x2
+; CHECK-NEXT: mov w9, #2147483647 // =0x7fffffff
; CHECK-NEXT: movk w8, #32768, lsl #16
; CHECK-NEXT: whilege p0.b, w9, w8
; CHECK-NEXT: ret
@@ -134,8 +137,8 @@ entry:
define <vscale x 16 x i1> @whilege_b_ii_dont_fold_to_ptrue_increment_overflow() {
; CHECK-LABEL: whilege_b_ii_dont_fold_to_ptrue_increment_overflow:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #2147483647
-; CHECK-NEXT: mov w9, #-2147483641
+; CHECK-NEXT: mov w8, #2147483647 // =0x7fffffff
+; CHECK-NEXT: mov w9, #-2147483641 // =0x80000007
; CHECK-NEXT: whilege p0.b, w9, w8
; CHECK-NEXT: ret
entry:
@@ -222,7 +225,7 @@ define <vscale x 2 x i1> @whilehs_d_xx(i64 %a, i64 %b) {
define <vscale x 2 x i1> @whilehs_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
; CHECK-LABEL: whilehs_d_ii_dont_fold_to_ptrue_larger_than_minvec:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #3
+; CHECK-NEXT: mov w8, #3 // =0x3
; CHECK-NEXT: whilehs p0.d, x8, xzr
; CHECK-NEXT: ret
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i64(i64 3, i64 0)
@@ -232,7 +235,9 @@ define <vscale x 2 x i1> @whilehs_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
define <vscale x 16 x i1> @whilehs_b_ii() {
; CHECK-LABEL: whilehs_b_ii:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.b, vl7
+; CHECK-NEXT: mov w8, #2 // =0x2
+; CHECK-NEXT: mov w9, #8 // =0x8
+; CHECK-NEXT: whilehs p0.b, x9, x8
; CHECK-NEXT: ret
entry:
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 8, i64 2)
@@ -242,7 +247,7 @@ entry:
define <vscale x 16 x i1> @whilehs_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
; CHECK-LABEL: whilehs_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #9
+; CHECK-NEXT: mov w8, #9 // =0x9
; CHECK-NEXT: whilehs p0.b, x8, xzr
; CHECK-NEXT: ret
entry:
@@ -253,7 +258,8 @@ entry:
define <vscale x 16 x i1> @whilehs_b_ii_vl_maximum() vscale_range(16, 16) {
; CHECK-LABEL: whilehs_b_ii_vl_maximum:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b, vl256
+; CHECK-NEXT: mov w8, #255 // =0xff
+; CHECK-NEXT: whilehs p0.b, x8, xzr
; CHECK-NEXT: ret
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 255, i64 0)
ret <vscale x 16 x i1> %out
@@ -262,8 +268,8 @@ define <vscale x 16 x i1> @whilehs_b_ii_vl_maximum() vscale_range(16, 16) {
define <vscale x 16 x i1> @whilehs_b_ii_dont_fold_to_ptrue_overflow() {
; CHECK-LABEL: whilehs_b_ii_dont_fold_to_ptrue_overflow:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #-1
-; CHECK-NEXT: mov w9, #6
+; CHECK-NEXT: mov w8, #-1 // =0xffffffff
+; CHECK-NEXT: mov w9, #6 // =0x6
; CHECK-NEXT: whilehs p0.b, w9, w8
; CHECK-NEXT: ret
entry:
@@ -274,7 +280,7 @@ entry:
define <vscale x 16 x i1> @whilehs_b_ii_dont_fold_to_ptrue_increment_overflow() {
; CHECK-LABEL: whilehs_b_ii_dont_fold_to_ptrue_increment_overflow:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #-1
+; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: whilehs p0.b, w8, wzr
; CHECK-NEXT: ret
entry:
@@ -361,7 +367,7 @@ define <vscale x 2 x i1> @whilegt_d_xx(i64 %a, i64 %b) {
define <vscale x 2 x i1> @whilegt_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
; CHECK-LABEL: whilegt_d_ii_dont_fold_to_ptrue_larger_than_minvec:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #3
+; CHECK-NEXT: mov w8, #3 // =0x3
; CHECK-NEXT: whilegt p0.d, x8, xzr
; CHECK-NEXT: ret
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilegt.nxv2i1.i64(i64 3, i64 0)
@@ -371,7 +377,9 @@ define <vscale x 2 x i1> @whilegt_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
define <vscale x 16 x i1> @whilegt_b_ii() {
; CHECK-LABEL: whilegt_b_ii:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.b, vl5
+; CHECK-NEXT: mov w8, #-2 // =0xfffffffe
+; CHECK-NEXT: mov w9, #3 // =0x3
+; CHECK-NEXT: whilegt p0.b, w9, w8
; CHECK-NEXT: ret
entry:
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i32(i32 3, i32 -2)
@@ -381,7 +389,7 @@ entry:
define <vscale x 16 x i1> @whilegt_b_ii_fold_to_ptrue_nonexistent_vl9() {
; CHECK-LABEL: whilegt_b_ii_fold_to_ptrue_nonexistent_vl9:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #9
+; CHECK-NEXT: mov w8, #9 // =0x9
; CHECK-NEXT: whilegt p0.b, x8, xzr
; CHECK-NEXT: ret
entry:
@@ -392,7 +400,8 @@ entry:
define <vscale x 16 x i1> @whilegt_b_ii_vl_maximum() vscale_range(16, 16) {
; CHECK-LABEL: whilegt_b_ii_vl_maximum:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b, vl256
+; CHECK-NEXT: mov w8, #256 // =0x100
+; CHECK-NEXT: whilegt p0.b, x8, xzr
; CHECK-NEXT: ret
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i64(i64 256, i64 0)
ret <vscale x 16 x i1> %out
@@ -401,8 +410,8 @@ define <vscale x 16 x i1> @whilegt_b_ii_vl_maximum() vscale_range(16, 16) {
define <vscale x 16 x i1> @whilegt_b_ii_dont_fold_to_ptrue_overflow() {
; CHECK-LABEL: whilegt_b_ii_dont_fold_to_ptrue_overflow:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #2147483647
-; CHECK-NEXT: mov w9, #-2147483641
+; CHECK-NEXT: mov w8, #2147483647 // =0x7fffffff
+; CHECK-NEXT: mov w9, #-2147483641 // =0x80000007
; CHECK-NEXT: whilegt p0.b, w9, w8
; CHECK-NEXT: ret
entry:
@@ -489,7 +498,7 @@ define <vscale x 2 x i1> @whilehi_d_xx(i64 %a, i64 %b) {
define <vscale x 2 x i1> @whilehi_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
; CHECK-LABEL: whilehi_d_ii_dont_fold_to_ptrue_larger_than_minvec:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #3
+; CHECK-NEXT: mov w8, #3 // =0x3
; CHECK-NEXT: whilehi p0.d, x8, xzr
; CHECK-NEXT: ret
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i64(i64 3, i64 0)
@@ -499,7 +508,9 @@ define <vscale x 2 x i1> @whilehi_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
define <vscale x 16 x i1> @whilehi_b_ii() {
; CHECK-LABEL: whilehi_b_ii:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.b, vl6
+; CHECK-NEXT: mov w8, #2 // =0x2
+; CHECK-NEXT: mov w9, #8 // =0x8
+; CHECK-NEXT: whilehi p0.b, x9, x8
; CHECK-NEXT: ret
entry:
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 8, i64 2)
@@ -509,7 +520,7 @@ entry:
define <vscale x 16 x i1> @whilehi_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
; CHECK-LABEL: whilehi_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #9
+; CHECK-NEXT: mov w8, #9 // =0x9
; CHECK-NEXT: whilehi p0.b, x8, xzr
; CHECK-NEXT: ret
entry:
@@ -520,7 +531,8 @@ entry:
define <vscale x 16 x i1> @whilehi_b_ii_vl_maximum() vscale_range(16, 16) {
; CHECK-LABEL: whilehi_b_ii_vl_maximum:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b, vl256
+; CHECK-NEXT: mov w8, #256 // =0x100
+; CHECK-NEXT: whilehi p0.b, x8, xzr
; CHECK-NEXT: ret
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 256, i64 0)
ret <vscale x 16 x i1> %out
@@ -529,8 +541,8 @@ define <vscale x 16 x i1> @whilehi_b_ii_vl_maximum() vscale_range(16, 16) {
define <vscale x 16 x i1> @whilelhi_b_ii_dont_fold_to_ptrue_overflow() {
; CHECK-LABEL: whilelhi_b_ii_dont_fold_to_ptrue_overflow:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #-1
-; CHECK-NEXT: mov w9, #7
+; CHECK-NEXT: mov w8, #-1 // =0xffffffff
+; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: whilehi p0.b, w9, w8
; CHECK-NEXT: ret
entry:
>From fe6a4f6fb89e4197b59a90b82fc36db8da75d5eb Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Thu, 11 Apr 2024 11:36:21 +0000
Subject: [PATCH 2/3] Remove redundant negative tests.
I've kept just a single test to show we don't perform the ptrue
transformation.
---
.../CodeGen/AArch64/sve2-intrinsics-while.ll | 200 +-----------------
1 file changed, 4 insertions(+), 196 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll
index 57795ea29c5dcd..2052ebf4c8bb03 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll
@@ -78,16 +78,7 @@ define <vscale x 2 x i1> @whilege_d_xx(i64 %a, i64 %b) {
ret <vscale x 2 x i1> %out
}
-define <vscale x 2 x i1> @whilege_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
-; CHECK-LABEL: whilege_d_ii_dont_fold_to_ptrue_larger_than_minvec:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #3 // =0x3
-; CHECK-NEXT: whilege p0.d, x8, xzr
-; CHECK-NEXT: ret
- %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64 3, i64 0)
- ret <vscale x 2 x i1> %out
-}
-
+; Ensure we don't convert constant decrementing while instructions to ptrue.
define <vscale x 16 x i1> @whilege_b_ii() {
; CHECK-LABEL: whilege_b_ii:
; CHECK: // %bb.0: // %entry
@@ -100,52 +91,6 @@ entry:
ret <vscale x 16 x i1> %out
}
-define <vscale x 16 x i1> @whilege_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
-; CHECK-LABEL: whilege_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #9 // =0x9
-; CHECK-NEXT: whilege p0.b, x8, xzr
-; CHECK-NEXT: ret
-entry:
- %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 9, i64 0)
- ret <vscale x 16 x i1> %out
-}
-
-define <vscale x 16 x i1> @whilege_b_ii_vl_maximum() vscale_range(16, 16) {
-; CHECK-LABEL: whilege_b_ii_vl_maximum:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #255 // =0xff
-; CHECK-NEXT: whilege p0.b, x8, xzr
-; CHECK-NEXT: ret
- %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 255, i64 0)
- ret <vscale x 16 x i1> %out
-}
-
-define <vscale x 16 x i1> @whilege_b_ii_dont_fold_to_ptrue_overflow() {
-; CHECK-LABEL: whilege_b_ii_dont_fold_to_ptrue_overflow:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #2 // =0x2
-; CHECK-NEXT: mov w9, #2147483647 // =0x7fffffff
-; CHECK-NEXT: movk w8, #32768, lsl #16
-; CHECK-NEXT: whilege p0.b, w9, w8
-; CHECK-NEXT: ret
-entry:
- %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 2147483647, i32 -2147483646)
- ret <vscale x 16 x i1> %out
-}
-
-define <vscale x 16 x i1> @whilege_b_ii_dont_fold_to_ptrue_increment_overflow() {
-; CHECK-LABEL: whilege_b_ii_dont_fold_to_ptrue_increment_overflow:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #2147483647 // =0x7fffffff
-; CHECK-NEXT: mov w9, #-2147483641 // =0x80000007
-; CHECK-NEXT: whilege p0.b, w9, w8
-; CHECK-NEXT: ret
-entry:
- %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 -2147483641, i32 2147483647)
- ret <vscale x 16 x i1> %out
-}
-
;
; WHILEHS
;
@@ -222,16 +167,7 @@ define <vscale x 2 x i1> @whilehs_d_xx(i64 %a, i64 %b) {
ret <vscale x 2 x i1> %out
}
-define <vscale x 2 x i1> @whilehs_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
-; CHECK-LABEL: whilehs_d_ii_dont_fold_to_ptrue_larger_than_minvec:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #3 // =0x3
-; CHECK-NEXT: whilehs p0.d, x8, xzr
-; CHECK-NEXT: ret
- %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i64(i64 3, i64 0)
- ret <vscale x 2 x i1> %out
-}
-
+; Ensure we don't convert constant decrementing while instructions to ptrue.
define <vscale x 16 x i1> @whilehs_b_ii() {
; CHECK-LABEL: whilehs_b_ii:
; CHECK: // %bb.0: // %entry
@@ -244,50 +180,6 @@ entry:
ret <vscale x 16 x i1> %out
}
-define <vscale x 16 x i1> @whilehs_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
-; CHECK-LABEL: whilehs_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #9 // =0x9
-; CHECK-NEXT: whilehs p0.b, x8, xzr
-; CHECK-NEXT: ret
-entry:
- %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 9, i64 0)
- ret <vscale x 16 x i1> %out
-}
-
-define <vscale x 16 x i1> @whilehs_b_ii_vl_maximum() vscale_range(16, 16) {
-; CHECK-LABEL: whilehs_b_ii_vl_maximum:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #255 // =0xff
-; CHECK-NEXT: whilehs p0.b, x8, xzr
-; CHECK-NEXT: ret
- %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 255, i64 0)
- ret <vscale x 16 x i1> %out
-}
-
-define <vscale x 16 x i1> @whilehs_b_ii_dont_fold_to_ptrue_overflow() {
-; CHECK-LABEL: whilehs_b_ii_dont_fold_to_ptrue_overflow:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: mov w9, #6 // =0x6
-; CHECK-NEXT: whilehs p0.b, w9, w8
-; CHECK-NEXT: ret
-entry:
- %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 6, i32 4294967295)
- ret <vscale x 16 x i1> %out
-}
-
-define <vscale x 16 x i1> @whilehs_b_ii_dont_fold_to_ptrue_increment_overflow() {
-; CHECK-LABEL: whilehs_b_ii_dont_fold_to_ptrue_increment_overflow:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: whilehs p0.b, w8, wzr
-; CHECK-NEXT: ret
-entry:
- %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 4294967295, i32 0)
- ret <vscale x 16 x i1> %out
-}
-
;
; WHILEGT
;
@@ -364,16 +256,7 @@ define <vscale x 2 x i1> @whilegt_d_xx(i64 %a, i64 %b) {
ret <vscale x 2 x i1> %out
}
-define <vscale x 2 x i1> @whilegt_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
-; CHECK-LABEL: whilegt_d_ii_dont_fold_to_ptrue_larger_than_minvec:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #3 // =0x3
-; CHECK-NEXT: whilegt p0.d, x8, xzr
-; CHECK-NEXT: ret
- %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilegt.nxv2i1.i64(i64 3, i64 0)
- ret <vscale x 2 x i1> %out
-}
-
+; Ensure we don't convert constant decrementing while instructions to ptrue.
define <vscale x 16 x i1> @whilegt_b_ii() {
; CHECK-LABEL: whilegt_b_ii:
; CHECK: // %bb.0: // %entry
@@ -386,39 +269,6 @@ entry:
ret <vscale x 16 x i1> %out
}
-define <vscale x 16 x i1> @whilegt_b_ii_fold_to_ptrue_nonexistent_vl9() {
-; CHECK-LABEL: whilegt_b_ii_fold_to_ptrue_nonexistent_vl9:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #9 // =0x9
-; CHECK-NEXT: whilegt p0.b, x8, xzr
-; CHECK-NEXT: ret
-entry:
- %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i64(i64 9, i64 0)
- ret <vscale x 16 x i1> %out
-}
-
-define <vscale x 16 x i1> @whilegt_b_ii_vl_maximum() vscale_range(16, 16) {
-; CHECK-LABEL: whilegt_b_ii_vl_maximum:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #256 // =0x100
-; CHECK-NEXT: whilegt p0.b, x8, xzr
-; CHECK-NEXT: ret
- %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i64(i64 256, i64 0)
- ret <vscale x 16 x i1> %out
-}
-
-define <vscale x 16 x i1> @whilegt_b_ii_dont_fold_to_ptrue_overflow() {
-; CHECK-LABEL: whilegt_b_ii_dont_fold_to_ptrue_overflow:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #2147483647 // =0x7fffffff
-; CHECK-NEXT: mov w9, #-2147483641 // =0x80000007
-; CHECK-NEXT: whilegt p0.b, w9, w8
-; CHECK-NEXT: ret
-entry:
- %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i32(i32 -2147483641, i32 2147483647)
- ret <vscale x 16 x i1> %out
-}
-
;
; WHILEHI
;
@@ -495,16 +345,7 @@ define <vscale x 2 x i1> @whilehi_d_xx(i64 %a, i64 %b) {
ret <vscale x 2 x i1> %out
}
-define <vscale x 2 x i1> @whilehi_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
-; CHECK-LABEL: whilehi_d_ii_dont_fold_to_ptrue_larger_than_minvec:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #3 // =0x3
-; CHECK-NEXT: whilehi p0.d, x8, xzr
-; CHECK-NEXT: ret
- %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i64(i64 3, i64 0)
- ret <vscale x 2 x i1> %out
-}
-
+; Ensure we don't convert constant decrementing while instructions to ptrue.
define <vscale x 16 x i1> @whilehi_b_ii() {
; CHECK-LABEL: whilehi_b_ii:
; CHECK: // %bb.0: // %entry
@@ -517,39 +358,6 @@ entry:
ret <vscale x 16 x i1> %out
}
-define <vscale x 16 x i1> @whilehi_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
-; CHECK-LABEL: whilehi_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #9 // =0x9
-; CHECK-NEXT: whilehi p0.b, x8, xzr
-; CHECK-NEXT: ret
-entry:
- %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 9, i64 0)
- ret <vscale x 16 x i1> %out
-}
-
-define <vscale x 16 x i1> @whilehi_b_ii_vl_maximum() vscale_range(16, 16) {
-; CHECK-LABEL: whilehi_b_ii_vl_maximum:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #256 // =0x100
-; CHECK-NEXT: whilehi p0.b, x8, xzr
-; CHECK-NEXT: ret
- %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 256, i64 0)
- ret <vscale x 16 x i1> %out
-}
-
-define <vscale x 16 x i1> @whilelhi_b_ii_dont_fold_to_ptrue_overflow() {
-; CHECK-LABEL: whilelhi_b_ii_dont_fold_to_ptrue_overflow:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: mov w9, #7 // =0x7
-; CHECK-NEXT: whilehi p0.b, w9, w8
-; CHECK-NEXT: ret
-entry:
- %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i32(i32 7, i32 4294967295)
- ret <vscale x 16 x i1> %out
-}
-
declare <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32, i32)
declare <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64, i64)
declare <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i32(i32, i32)
>From e1d4a933886991d26679559be3ac4479d410b5a5 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Fri, 12 Apr 2024 10:36:47 +0000
Subject: [PATCH 3/3] Rename optimizeWhile as optimizeIncrementingWhile to
better reflect its intended use.
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index dc46dfc82b8c65..cb58fe2d7dda69 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5035,8 +5035,8 @@ static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
DAG.getTargetConstant(Pattern, DL, MVT::i32));
}
-static SDValue optimizeWhile(SDValue Op, SelectionDAG &DAG, bool IsSigned,
- bool IsEqual) {
+static SDValue optimizeIncrementingWhile(SDValue Op, SelectionDAG &DAG,
+ bool IsSigned, bool IsEqual) {
if (!isa<ConstantSDNode>(Op.getOperand(1)) ||
!isa<ConstantSDNode>(Op.getOperand(2)))
return SDValue();
@@ -5393,13 +5393,17 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return SDValue();
}
case Intrinsic::aarch64_sve_whilelo:
- return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsEqual=*/false);
+ return optimizeIncrementingWhile(Op, DAG, /*IsSigned=*/false,
+ /*IsEqual=*/false);
case Intrinsic::aarch64_sve_whilelt:
- return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsEqual=*/false);
+ return optimizeIncrementingWhile(Op, DAG, /*IsSigned=*/true,
+ /*IsEqual=*/false);
case Intrinsic::aarch64_sve_whilels:
- return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsEqual=*/true);
+ return optimizeIncrementingWhile(Op, DAG, /*IsSigned=*/false,
+ /*IsEqual=*/true);
case Intrinsic::aarch64_sve_whilele:
- return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsEqual=*/true);
+ return optimizeIncrementingWhile(Op, DAG, /*IsSigned=*/true,
+ /*IsEqual=*/true);
case Intrinsic::aarch64_sve_sunpkhi:
return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
Op.getOperand(1));
More information about the llvm-commits
mailing list