[llvm] [AArch64][CodeGen] Add patterns for small negative VScale const (PR #89607)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 23 20:16:53 PDT 2024
https://github.com/vfdff updated https://github.com/llvm/llvm-project/pull/89607
>From c1a935bbf4bacf0c8ff9f74d6c745b03665298a6 Mon Sep 17 00:00:00 2001
From: zhongyunde 00443407 <zhongyunde at huawei.com>
Date: Tue, 9 Apr 2024 05:25:16 -0400
Subject: [PATCH 1/2] [AArch64][CodeGen] Add patterns for small negative VScale
const
On AArch64, rdvl can accept a nagative value, while cntd/cntw/cnth can't.
As we do support VScale with a negative multiply value, so we did not limit
the negative value and instead took the hit of having the extra patterns.
Also add NoUseScalarIncVL to avoid affecting patterns works for -mattr=+use-scalar-inc-vl
Fix https://github.com/llvm/llvm-project/issues/84620
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 ++
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 21 ++++++++++++++++
...plex-deinterleaving-reductions-scalable.ll | 24 +++++++++----------
llvm/test/CodeGen/AArch64/sve-vl-arith.ll | 18 +++++---------
.../vscale-and-sve-cnt-demandedbits.ll | 10 ++++----
5 files changed, 46 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index be53be782077b7..2c6bfdd6c97456 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -310,6 +310,8 @@ def UseNegativeImmediates
def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
+def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;
+
def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
def IsNeonAvailable : Predicate<"Subtarget->isNeonAvailable()">;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 6972acd985cb9a..d50182cc8f1e4d 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2583,6 +2583,27 @@ let Predicates = [HasSVEorSME] in {
sub_32)>;
}
+ // Add NoUseScalarIncVL to avoid affecting for patterns with UseScalarIncVL
+ let Predicates = [NoUseScalarIncVL] in {
+ def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))),
+ (ADDXrs GPR64:$op, (RDVLI_XI $imm), 0)>;
+ def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm_neg i32:$imm))),
+ (SUBXrs GPR64:$op, (CNTH_XPiI 31, $imm), 0)>;
+ def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm_neg i32:$imm))),
+ (SUBXrs GPR64:$op, (CNTW_XPiI 31, $imm), 0)>;
+ def : Pat<(add GPR64:$op, (vscale (sve_cntd_imm_neg i32:$imm))),
+ (SUBXrs GPR64:$op, (CNTD_XPiI 31, $imm), 0)>;
+
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_rdvl_imm i32:$imm))))),
+ (ADDSWrr GPR32:$op, (EXTRACT_SUBREG (RDVLI_XI $imm), sub_32))>;
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cnth_imm_neg i32:$imm))))),
+ (SUBSWrr GPR32:$op, (EXTRACT_SUBREG (CNTH_XPiI 31, $imm), sub_32))>;
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntw_imm_neg i32:$imm))))),
+ (SUBSWrr GPR32:$op, (EXTRACT_SUBREG (CNTW_XPiI 31, $imm), sub_32))>;
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntd_imm_neg i32:$imm))))),
+ (SUBSWrr GPR32:$op, (EXTRACT_SUBREG (CNTD_XPiI 31, $imm), sub_32))>;
+ }
+
// FIXME: BigEndian requires an additional REV instruction to satisfy the
// constraint that none of the bits change when stored to memory as one
// type, and reloaded as another type.
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
index 664d99a3627b58..5bef95910d9060 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
@@ -17,11 +17,11 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-NEXT: mov z1.d, #0 // =0x0
; CHECK-NEXT: cntd x9
; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: neg x9, x9
-; CHECK-NEXT: mov w10, #100 // =0x64
+; CHECK-NEXT: neg x10, x9
+; CHECK-NEXT: mov w11, #100 // =0x64
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: and x10, x9, x10
+; CHECK-NEXT: and x10, x10, x11
; CHECK-NEXT: rdvl x11, #2
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
@@ -33,7 +33,7 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x12, #1, mul vl]
; CHECK-NEXT: ld1b { z4.b }, p1/z, [x1, x8]
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x13, #1, mul vl]
-; CHECK-NEXT: adds x10, x10, x9
+; CHECK-NEXT: subs x10, x10, x9
; CHECK-NEXT: add x8, x8, x11
; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0
; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0
@@ -106,12 +106,12 @@ define %"class.std::complex" @complex_mul_nonzero_init_v2f64(ptr %a, ptr %b) {
; CHECK-NEXT: cntd x9
; CHECK-NEXT: fmov d2, #2.00000000
; CHECK-NEXT: ptrue p0.d, vl1
-; CHECK-NEXT: neg x9, x9
+; CHECK-NEXT: neg x10, x9
; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: mov w10, #100 // =0x64
+; CHECK-NEXT: mov w11, #100 // =0x64
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: sel z3.d, p0, z0.d, z1.d
-; CHECK-NEXT: and x10, x9, x10
+; CHECK-NEXT: and x10, x10, x11
; CHECK-NEXT: rdvl x11, #2
; CHECK-NEXT: mov z1.d, p0/m, z2.d
; CHECK-NEXT: ptrue p0.d
@@ -125,7 +125,7 @@ define %"class.std::complex" @complex_mul_nonzero_init_v2f64(ptr %a, ptr %b) {
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x12, #1, mul vl]
; CHECK-NEXT: ld1b { z4.b }, p1/z, [x1, x8]
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x13, #1, mul vl]
-; CHECK-NEXT: adds x10, x10, x9
+; CHECK-NEXT: subs x10, x10, x9
; CHECK-NEXT: add x8, x8, x11
; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0
; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0
@@ -191,13 +191,13 @@ define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z1.d, #0 // =0x0
; CHECK-NEXT: cntw x9
-; CHECK-NEXT: mov w10, #1000 // =0x3e8
-; CHECK-NEXT: neg x9, x9
+; CHECK-NEXT: mov w11, #1000 // =0x3e8
+; CHECK-NEXT: neg x10, x9
; CHECK-NEXT: rdvl x12, #2
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: and x10, x9, x10
+; CHECK-NEXT: and x10, x10, x11
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
; CHECK-NEXT: add x11, x1, x12
@@ -219,7 +219,7 @@ define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
; CHECK-NEXT: ld1d { z17.d }, p0/z, [x15, #1, mul vl]
; CHECK-NEXT: ld1b { z18.b }, p1/z, [x11, x8]
; CHECK-NEXT: ld1d { z19.d }, p0/z, [x17, #1, mul vl]
-; CHECK-NEXT: adds x10, x10, x9
+; CHECK-NEXT: subs x10, x10, x9
; CHECK-NEXT: add x8, x8, x13
; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z4.d, #0
; CHECK-NEXT: fcmla z0.d, p0/m, z16.d, z5.d, #0
diff --git a/llvm/test/CodeGen/AArch64/sve-vl-arith.ll b/llvm/test/CodeGen/AArch64/sve-vl-arith.ll
index dd4294c8d3bdcc..98d96da427c4f0 100644
--- a/llvm/test/CodeGen/AArch64/sve-vl-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vl-arith.ll
@@ -204,8 +204,7 @@ define i64 @dech_scalar_i64(i64 %a) {
; NO_SCALAR_INC-LABEL: dech_scalar_i64:
; NO_SCALAR_INC: // %bb.0:
; NO_SCALAR_INC-NEXT: cnth x8, all, mul #3
-; NO_SCALAR_INC-NEXT: neg x8, x8
-; NO_SCALAR_INC-NEXT: add x0, x0, x8
+; NO_SCALAR_INC-NEXT: sub x0, x0, x8
; NO_SCALAR_INC-NEXT: ret
;
; CHECK-LABEL: dech_scalar_i64:
@@ -222,8 +221,7 @@ define i64 @decw_scalar_i64(i64 %a) {
; NO_SCALAR_INC-LABEL: decw_scalar_i64:
; NO_SCALAR_INC: // %bb.0:
; NO_SCALAR_INC-NEXT: cntw x8, all, mul #3
-; NO_SCALAR_INC-NEXT: neg x8, x8
-; NO_SCALAR_INC-NEXT: add x0, x0, x8
+; NO_SCALAR_INC-NEXT: sub x0, x0, x8
; NO_SCALAR_INC-NEXT: ret
;
; CHECK-LABEL: decw_scalar_i64:
@@ -240,8 +238,7 @@ define i64 @decd_scalar_i64(i64 %a) {
; NO_SCALAR_INC-LABEL: decd_scalar_i64:
; NO_SCALAR_INC: // %bb.0:
; NO_SCALAR_INC-NEXT: cntd x8, all, mul #3
-; NO_SCALAR_INC-NEXT: neg x8, x8
-; NO_SCALAR_INC-NEXT: add x0, x0, x8
+; NO_SCALAR_INC-NEXT: sub x0, x0, x8
; NO_SCALAR_INC-NEXT: ret
;
; CHECK-LABEL: decd_scalar_i64:
@@ -367,8 +364,7 @@ define i32 @dech_scalar_i32(i32 %a) {
; NO_SCALAR_INC-LABEL: dech_scalar_i32:
; NO_SCALAR_INC: // %bb.0:
; NO_SCALAR_INC-NEXT: cnth x8
-; NO_SCALAR_INC-NEXT: neg x8, x8
-; NO_SCALAR_INC-NEXT: add w0, w0, w8
+; NO_SCALAR_INC-NEXT: sub w0, w0, w8
; NO_SCALAR_INC-NEXT: ret
;
; CHECK-LABEL: dech_scalar_i32:
@@ -389,8 +385,7 @@ define i32 @decw_scalar_i32(i32 %a) {
; NO_SCALAR_INC-LABEL: decw_scalar_i32:
; NO_SCALAR_INC: // %bb.0:
; NO_SCALAR_INC-NEXT: cntw x8
-; NO_SCALAR_INC-NEXT: neg x8, x8
-; NO_SCALAR_INC-NEXT: add w0, w0, w8
+; NO_SCALAR_INC-NEXT: sub w0, w0, w8
; NO_SCALAR_INC-NEXT: ret
;
; CHECK-LABEL: decw_scalar_i32:
@@ -411,8 +406,7 @@ define i32 @decd_scalar_i32(i32 %a) {
; NO_SCALAR_INC-LABEL: decd_scalar_i32:
; NO_SCALAR_INC: // %bb.0:
; NO_SCALAR_INC-NEXT: cntd x8
-; NO_SCALAR_INC-NEXT: neg x8, x8
-; NO_SCALAR_INC-NEXT: add w0, w0, w8
+; NO_SCALAR_INC-NEXT: sub w0, w0, w8
; NO_SCALAR_INC-NEXT: ret
;
; CHECK-LABEL: decd_scalar_i32:
diff --git a/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll b/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll
index dbdab799c83522..9572778484f8d3 100644
--- a/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll
+++ b/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll
@@ -194,7 +194,7 @@ define i32 @vscale_with_multiplier() vscale_range(1,16) {
; CHECK-LABEL: vscale_with_multiplier:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
-; CHECK-NEXT: mov w9, #5
+; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: and w9, w8, #0x3f
@@ -212,7 +212,7 @@ define i32 @vscale_with_negative_multiplier() vscale_range(1,16) {
; CHECK-LABEL: vscale_with_negative_multiplier:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
-; CHECK-NEXT: mov x9, #-5
+; CHECK-NEXT: mov x9, #-5 // =0xfffffffffffffffb
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: and w9, w8, #0xffffffc0
@@ -230,9 +230,9 @@ define i32 @pow2_vscale_with_negative_multiplier() vscale_range(1,16) {
; CHECK-LABEL: pow2_vscale_with_negative_multiplier:
; CHECK: // %bb.0:
; CHECK-NEXT: cntd x8
-; CHECK-NEXT: neg x8, x8
-; CHECK-NEXT: orr w9, w8, #0xfffffff0
-; CHECK-NEXT: add w0, w8, w9
+; CHECK-NEXT: neg x9, x8
+; CHECK-NEXT: orr w9, w9, #0xfffffff0
+; CHECK-NEXT: sub w0, w9, w8
; CHECK-NEXT: ret
%vscale = call i32 @llvm.vscale.i32()
%mul = mul i32 %vscale, -2
>From f0326e27c0e26fe24526254cb3689c645a9fd971 Mon Sep 17 00:00:00 2001
From: zhongyunde 00443407 <zhongyunde at huawei.com>
Date: Tue, 23 Apr 2024 22:29:58 -0400
Subject: [PATCH 2/2] [CodeGen] fix comment
---
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 38 +++++++++----------
llvm/test/CodeGen/AArch64/sve-vl-arith.ll | 8 ++--
2 files changed, 21 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index d50182cc8f1e4d..1ca474e563247b 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2517,6 +2517,23 @@ let Predicates = [HasSVEorSME] in {
def : Pat<(vscale (sve_cntd_imm_neg i32:$imm)), (SUBXrs XZR, (CNTD_XPiI 31, $imm), 0)>;
}
+ // Add NoUseScalarIncVL to avoid affecting for patterns with UseScalarIncVL
+ let Predicates = [HasSVEorSME, NoUseScalarIncVL] in {
+ def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm_neg i32:$imm))),
+ (SUBXrs GPR64:$op, (CNTH_XPiI 31, $imm), 0)>;
+ def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm_neg i32:$imm))),
+ (SUBXrs GPR64:$op, (CNTW_XPiI 31, $imm), 0)>;
+ def : Pat<(add GPR64:$op, (vscale (sve_cntd_imm_neg i32:$imm))),
+ (SUBXrs GPR64:$op, (CNTD_XPiI 31, $imm), 0)>;
+
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cnth_imm_neg i32:$imm))))),
+ (SUBSWrr GPR32:$op, (EXTRACT_SUBREG (CNTH_XPiI 31, $imm), sub_32))>;
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntw_imm_neg i32:$imm))))),
+ (SUBSWrr GPR32:$op, (EXTRACT_SUBREG (CNTW_XPiI 31, $imm), sub_32))>;
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntd_imm_neg i32:$imm))))),
+ (SUBSWrr GPR32:$op, (EXTRACT_SUBREG (CNTD_XPiI 31, $imm), sub_32))>;
+ }
+
let AddedComplexity = 5 in {
def : Pat<(nxv8i16 (add ZPR:$op, (nxv8i16 (splat_vector (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))),
(INCH_ZPiI ZPR:$op, 31, $imm)>;
@@ -2583,27 +2600,6 @@ let Predicates = [HasSVEorSME] in {
sub_32)>;
}
- // Add NoUseScalarIncVL to avoid affecting for patterns with UseScalarIncVL
- let Predicates = [NoUseScalarIncVL] in {
- def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))),
- (ADDXrs GPR64:$op, (RDVLI_XI $imm), 0)>;
- def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm_neg i32:$imm))),
- (SUBXrs GPR64:$op, (CNTH_XPiI 31, $imm), 0)>;
- def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm_neg i32:$imm))),
- (SUBXrs GPR64:$op, (CNTW_XPiI 31, $imm), 0)>;
- def : Pat<(add GPR64:$op, (vscale (sve_cntd_imm_neg i32:$imm))),
- (SUBXrs GPR64:$op, (CNTD_XPiI 31, $imm), 0)>;
-
- def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_rdvl_imm i32:$imm))))),
- (ADDSWrr GPR32:$op, (EXTRACT_SUBREG (RDVLI_XI $imm), sub_32))>;
- def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cnth_imm_neg i32:$imm))))),
- (SUBSWrr GPR32:$op, (EXTRACT_SUBREG (CNTH_XPiI 31, $imm), sub_32))>;
- def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntw_imm_neg i32:$imm))))),
- (SUBSWrr GPR32:$op, (EXTRACT_SUBREG (CNTW_XPiI 31, $imm), sub_32))>;
- def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntd_imm_neg i32:$imm))))),
- (SUBSWrr GPR32:$op, (EXTRACT_SUBREG (CNTD_XPiI 31, $imm), sub_32))>;
- }
-
// FIXME: BigEndian requires an additional REV instruction to satisfy the
// constraint that none of the bits change when stored to memory as one
// type, and reloaded as another type.
diff --git a/llvm/test/CodeGen/AArch64/sve-vl-arith.ll b/llvm/test/CodeGen/AArch64/sve-vl-arith.ll
index 98d96da427c4f0..de2af590acd1e2 100644
--- a/llvm/test/CodeGen/AArch64/sve-vl-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vl-arith.ll
@@ -186,8 +186,8 @@ define i64 @incd_scalar_i64(i64 %a) {
define i64 @decb_scalar_i64(i64 %a) {
; NO_SCALAR_INC-LABEL: decb_scalar_i64:
; NO_SCALAR_INC: // %bb.0:
-; NO_SCALAR_INC-NEXT: rdvl x8, #-2
-; NO_SCALAR_INC-NEXT: add x0, x0, x8
+; NO_SCALAR_INC-NEXT: cnth x8, all, mul #4
+; NO_SCALAR_INC-NEXT: sub x0, x0, x8
; NO_SCALAR_INC-NEXT: ret
;
; CHECK-LABEL: decb_scalar_i64:
@@ -342,8 +342,8 @@ define i32 @incd_scalar_i32(i32 %a) {
define i32 @decb_scalar_i32(i32 %a) {
; NO_SCALAR_INC-LABEL: decb_scalar_i32:
; NO_SCALAR_INC: // %bb.0:
-; NO_SCALAR_INC-NEXT: rdvl x8, #-4
-; NO_SCALAR_INC-NEXT: add w0, w0, w8
+; NO_SCALAR_INC-NEXT: cnth x8, all, mul #8
+; NO_SCALAR_INC-NEXT: sub w0, w0, w8
; NO_SCALAR_INC-NEXT: ret
;
; CHECK-LABEL: decb_scalar_i32:
More information about the llvm-commits
mailing list