[llvm] [AArch64][CodeGen] Add patterns for small negative VScale const (PR #89607)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 22 07:13:44 PDT 2024
https://github.com/vfdff updated https://github.com/llvm/llvm-project/pull/89607
>From 5c4a6a315bd494bb830cb58a060dd15284ca15e6 Mon Sep 17 00:00:00 2001
From: zhongyunde 00443407 <zhongyunde at huawei.com>
Date: Tue, 9 Apr 2024 05:25:16 -0400
Subject: [PATCH] [AArch64][CodeGen] Add patterns for small negative VScale
const
On AArch64, rdvl can accept a nagative value, while cntd/cntw/cnth can't.
As we do support VScale with a negative multiply value, so we did not limit
the negative value and instead took the hit of having the extra patterns.
Also add NoUseScalarIncVL to avoid affecting patterns works for -mattr=+use-scalar-inc-vl
Fix https://github.com/llvm/llvm-project/issues/84620
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 ++
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 21 +++++++++++++++++++
...plex-deinterleaving-reductions-scalable.ll | 6 +++---
llvm/test/CodeGen/AArch64/sve-vl-arith.ll | 18 ++++++----------
.../vscale-and-sve-cnt-demandedbits.ll | 10 ++++-----
5 files changed, 37 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index b1f514f75207f0..279e3aedcc58d8 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -310,6 +310,8 @@ def UseNegativeImmediates
def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
+def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;
+
def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
def IsNeonAvailable : Predicate<"Subtarget->isNeonAvailable()">;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index dd5e11c0f5e35d..24800a42fb4acc 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2562,6 +2562,27 @@ let Predicates = [HasSVEorSME] in {
sub_32)>;
}
+ // Add NoUseScalarIncVL to avoid affecting for patterns with UseScalarIncVL
+ let Predicates = [NoUseScalarIncVL] in {
+ def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))),
+ (ADDXrs GPR64:$op, (RDVLI_XI $imm), 0)>;
+ def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm_neg i32:$imm))),
+ (SUBXrs GPR64:$op, (CNTH_XPiI 31, $imm), 0)>;
+ def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm_neg i32:$imm))),
+ (SUBXrs GPR64:$op, (CNTW_XPiI 31, $imm), 0)>;
+ def : Pat<(add GPR64:$op, (vscale (sve_cntd_imm_neg i32:$imm))),
+ (SUBXrs GPR64:$op, (CNTD_XPiI 31, $imm), 0)>;
+
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_rdvl_imm i32:$imm))))),
+ (ADDSWrr GPR32:$op, (EXTRACT_SUBREG (RDVLI_XI $imm), sub_32))>;
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cnth_imm_neg i32:$imm))))),
+ (SUBSWrr GPR32:$op, (EXTRACT_SUBREG (CNTH_XPiI 31, $imm), sub_32))>;
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntw_imm_neg i32:$imm))))),
+ (SUBSWrr GPR32:$op, (EXTRACT_SUBREG (CNTW_XPiI 31, $imm), sub_32))>;
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntd_imm_neg i32:$imm))))),
+ (SUBSWrr GPR32:$op, (EXTRACT_SUBREG (CNTD_XPiI 31, $imm), sub_32))>;
+ }
+
// FIXME: BigEndian requires an additional REV instruction to satisfy the
// constraint that none of the bits change when stored to memory as one
// type, and reloaded as another type.
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
index 1696ac8709d406..2d9f939f0e6664 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
@@ -33,7 +33,7 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x12, #1, mul vl]
; CHECK-NEXT: ld1b { z4.b }, p1/z, [x1, x8]
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x13, #1, mul vl]
-; CHECK-NEXT: adds x10, x10, x9
+; CHECK-NEXT: subs x10, x10, x9
; CHECK-NEXT: add x8, x8, x11
; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0
; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0
@@ -125,7 +125,7 @@ define %"class.std::complex" @complex_mul_nonzero_init_v2f64(ptr %a, ptr %b) {
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x12, #1, mul vl]
; CHECK-NEXT: ld1b { z4.b }, p1/z, [x1, x8]
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x13, #1, mul vl]
-; CHECK-NEXT: adds x10, x10, x9
+; CHECK-NEXT: subs x10, x10, x9
; CHECK-NEXT: add x8, x8, x11
; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0
; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0
@@ -219,7 +219,7 @@ define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
; CHECK-NEXT: ld1d { z17.d }, p0/z, [x15, #1, mul vl]
; CHECK-NEXT: ld1b { z18.b }, p1/z, [x11, x8]
; CHECK-NEXT: ld1d { z19.d }, p0/z, [x17, #1, mul vl]
-; CHECK-NEXT: adds x10, x10, x9
+; CHECK-NEXT: subs x10, x10, x9
; CHECK-NEXT: add x8, x8, x13
; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z4.d, #0
; CHECK-NEXT: fcmla z0.d, p0/m, z16.d, z5.d, #0
diff --git a/llvm/test/CodeGen/AArch64/sve-vl-arith.ll b/llvm/test/CodeGen/AArch64/sve-vl-arith.ll
index dd4294c8d3bdcc..98d96da427c4f0 100644
--- a/llvm/test/CodeGen/AArch64/sve-vl-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vl-arith.ll
@@ -204,8 +204,7 @@ define i64 @dech_scalar_i64(i64 %a) {
; NO_SCALAR_INC-LABEL: dech_scalar_i64:
; NO_SCALAR_INC: // %bb.0:
; NO_SCALAR_INC-NEXT: cnth x8, all, mul #3
-; NO_SCALAR_INC-NEXT: neg x8, x8
-; NO_SCALAR_INC-NEXT: add x0, x0, x8
+; NO_SCALAR_INC-NEXT: sub x0, x0, x8
; NO_SCALAR_INC-NEXT: ret
;
; CHECK-LABEL: dech_scalar_i64:
@@ -222,8 +221,7 @@ define i64 @decw_scalar_i64(i64 %a) {
; NO_SCALAR_INC-LABEL: decw_scalar_i64:
; NO_SCALAR_INC: // %bb.0:
; NO_SCALAR_INC-NEXT: cntw x8, all, mul #3
-; NO_SCALAR_INC-NEXT: neg x8, x8
-; NO_SCALAR_INC-NEXT: add x0, x0, x8
+; NO_SCALAR_INC-NEXT: sub x0, x0, x8
; NO_SCALAR_INC-NEXT: ret
;
; CHECK-LABEL: decw_scalar_i64:
@@ -240,8 +238,7 @@ define i64 @decd_scalar_i64(i64 %a) {
; NO_SCALAR_INC-LABEL: decd_scalar_i64:
; NO_SCALAR_INC: // %bb.0:
; NO_SCALAR_INC-NEXT: cntd x8, all, mul #3
-; NO_SCALAR_INC-NEXT: neg x8, x8
-; NO_SCALAR_INC-NEXT: add x0, x0, x8
+; NO_SCALAR_INC-NEXT: sub x0, x0, x8
; NO_SCALAR_INC-NEXT: ret
;
; CHECK-LABEL: decd_scalar_i64:
@@ -367,8 +364,7 @@ define i32 @dech_scalar_i32(i32 %a) {
; NO_SCALAR_INC-LABEL: dech_scalar_i32:
; NO_SCALAR_INC: // %bb.0:
; NO_SCALAR_INC-NEXT: cnth x8
-; NO_SCALAR_INC-NEXT: neg x8, x8
-; NO_SCALAR_INC-NEXT: add w0, w0, w8
+; NO_SCALAR_INC-NEXT: sub w0, w0, w8
; NO_SCALAR_INC-NEXT: ret
;
; CHECK-LABEL: dech_scalar_i32:
@@ -389,8 +385,7 @@ define i32 @decw_scalar_i32(i32 %a) {
; NO_SCALAR_INC-LABEL: decw_scalar_i32:
; NO_SCALAR_INC: // %bb.0:
; NO_SCALAR_INC-NEXT: cntw x8
-; NO_SCALAR_INC-NEXT: neg x8, x8
-; NO_SCALAR_INC-NEXT: add w0, w0, w8
+; NO_SCALAR_INC-NEXT: sub w0, w0, w8
; NO_SCALAR_INC-NEXT: ret
;
; CHECK-LABEL: decw_scalar_i32:
@@ -411,8 +406,7 @@ define i32 @decd_scalar_i32(i32 %a) {
; NO_SCALAR_INC-LABEL: decd_scalar_i32:
; NO_SCALAR_INC: // %bb.0:
; NO_SCALAR_INC-NEXT: cntd x8
-; NO_SCALAR_INC-NEXT: neg x8, x8
-; NO_SCALAR_INC-NEXT: add w0, w0, w8
+; NO_SCALAR_INC-NEXT: sub w0, w0, w8
; NO_SCALAR_INC-NEXT: ret
;
; CHECK-LABEL: decd_scalar_i32:
diff --git a/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll b/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll
index dbdab799c83522..9572778484f8d3 100644
--- a/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll
+++ b/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll
@@ -194,7 +194,7 @@ define i32 @vscale_with_multiplier() vscale_range(1,16) {
; CHECK-LABEL: vscale_with_multiplier:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
-; CHECK-NEXT: mov w9, #5
+; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: and w9, w8, #0x3f
@@ -212,7 +212,7 @@ define i32 @vscale_with_negative_multiplier() vscale_range(1,16) {
; CHECK-LABEL: vscale_with_negative_multiplier:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
-; CHECK-NEXT: mov x9, #-5
+; CHECK-NEXT: mov x9, #-5 // =0xfffffffffffffffb
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: and w9, w8, #0xffffffc0
@@ -230,9 +230,9 @@ define i32 @pow2_vscale_with_negative_multiplier() vscale_range(1,16) {
; CHECK-LABEL: pow2_vscale_with_negative_multiplier:
; CHECK: // %bb.0:
; CHECK-NEXT: cntd x8
-; CHECK-NEXT: neg x8, x8
-; CHECK-NEXT: orr w9, w8, #0xfffffff0
-; CHECK-NEXT: add w0, w8, w9
+; CHECK-NEXT: neg x9, x8
+; CHECK-NEXT: orr w9, w9, #0xfffffff0
+; CHECK-NEXT: sub w0, w9, w8
; CHECK-NEXT: ret
%vscale = call i32 @llvm.vscale.i32()
%mul = mul i32 %vscale, -2
More information about the llvm-commits
mailing list