[llvm] [AArch64] Add patterns for sub from add negative immediates (PR #156024)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 29 06:27:27 PDT 2025
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/156024
>From 4d1bd39e0b1232cf736ba9e30fbe44eeaf476c34 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 29 Aug 2025 10:25:10 +0100
Subject: [PATCH 1/3] [AArch64] Add -ve -> sub via isel
---
.../Target/AArch64/AArch64ISelLowering.cpp | 9 +++++++
.../sve-fixed-length-addressing-modes.ll | 6 +++--
.../AArch64/sve-fixed-length-bitselect.ll | 14 +++++------
.../AArch64/sve-fixed-length-build-vector.ll | 3 ++-
.../AArch64/sve-index-const-step-vector.ll | 3 ++-
llvm/test/CodeGen/AArch64/sve-int-arith.ll | 17 +++++++------
llvm/test/CodeGen/AArch64/sve-int-imm.ll | 23 +++++++-----------
.../CodeGen/AArch64/sve-intrinsics-index.ll | 24 +++++++++++--------
...e-streaming-mode-fixed-length-bitselect.ll | 23 +++++++++---------
...treaming-mode-fixed-length-build-vector.ll | 13 +++++-----
llvm/test/CodeGen/AArch64/sve2-bsl.ll | 9 ++++---
11 files changed, 79 insertions(+), 65 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 23328ed57fb36..da65c315a2d25 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -21813,6 +21813,15 @@ static SDValue performAddSubCombine(SDNode *N,
if (SDValue Val = performExtBinopLoadFold(N, DCI.DAG))
return Val;
+ APInt Imm;
+ if (N->getValueType(0).isScalableVector() &&
+ ISD::isConstantSplatVector(N->getOperand(1).getNode(), Imm) &&
+ Imm.isNegative() && ((-Imm & ~0xff) == 0 || (-Imm & ~0xff00) == 0))
+ return DCI.DAG.getNode(
+ N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD, SDLoc(N),
+ N->getValueType(0), N->getOperand(0),
+ DCI.DAG.getConstant(-Imm, SDLoc(N), N->getValueType(0)));
+
return performAddSubLongCombine(N, DCI);
}
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-addressing-modes.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-addressing-modes.ll
index 7ccf899c70e31..019089812805f 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-addressing-modes.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-addressing-modes.ll
@@ -23,7 +23,8 @@ define void @masked_gather_base_plus_stride_v4f64(ptr %dst, ptr %src) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-32 // =0xffffffffffffffe0
; CHECK-NEXT: ptrue p0.d, vl4
-; CHECK-NEXT: index z0.d, #-2, x8
+; CHECK-NEXT: index z0.d, #0, x8
+; CHECK-NEXT: sub z0.d, z0.d, #2 // =0x2
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1, z0.d, lsl #3]
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
@@ -50,9 +51,10 @@ define void @masked_scatter_base_plus_stride_v8f32(ptr %dst, ptr %src) #0 {
define void @masked_scatter_base_plus_stride_v4f64(ptr %dst, ptr %src) #0 {
; CHECK-LABEL: masked_scatter_base_plus_stride_v4f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.d, #0, #3
; CHECK-NEXT: ptrue p0.d, vl4
-; CHECK-NEXT: index z0.d, #-2, #3
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
+; CHECK-NEXT: sub z0.d, z0.d, #2 // =0x2
; CHECK-NEXT: st1d { z1.d }, p0, [x0, z0.d, lsl #3]
; CHECK-NEXT: ret
%data = load <4 x double>, ptr %src, align 8
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-bitselect.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-bitselect.ll
index fb494afa11de2..258e399018ba8 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-bitselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-bitselect.ll
@@ -13,15 +13,15 @@ define void @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %right_
; CHECK-LABEL: fixed_bitselect_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
-; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1]
+; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: ld1w { z3.s }, p0/z, [x2]
-; CHECK-NEXT: add z1.s, z0.s, z1.s
-; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
-; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
-; CHECK-NEXT: orr z0.d, z1.d, z0.d
+; CHECK-NEXT: mov z2.d, z0.d
+; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
+; CHECK-NEXT: subr z2.s, z2.s, #0 // =0x0
+; CHECK-NEXT: and z0.d, z0.d, z3.d
+; CHECK-NEXT: and z1.d, z2.d, z1.d
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: st1w { z0.s }, p0, [x3]
; CHECK-NEXT: ret
%pre_cond = load <8 x i32>, ptr %pre_cond_ptr
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-build-vector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-build-vector.ll
index 47fda39d84001..0a918a8160137 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-build-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-build-vector.ll
@@ -44,7 +44,8 @@ define void @build_vector_minus2_dec32_v4i64(ptr %a) #0 {
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #-32 // =0xffffffffffffffe0
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
-; VBITS_GE_256-NEXT: index z0.d, #-2, x8
+; VBITS_GE_256-NEXT: index z0.d, #0, x8
+; VBITS_GE_256-NEXT: sub z0.d, z0.d, #2 // =0x2
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
store <4 x i64> <i64 -2, i64 -34, i64 -66, i64 -98>, ptr %a, align 8
diff --git a/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll b/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
index 433ddbd4a261b..de3287587c200 100644
--- a/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
@@ -84,7 +84,8 @@ define <4 x i32> @v4i32_non_zero_non_one() #0 {
define <4 x i32> @v4i32_neg_immediates() #0 {
; CHECK-LABEL: v4i32_neg_immediates:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z0.s, #-1, #-2
+; CHECK-NEXT: index z0.s, #0, #-2
+; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
ret <4 x i32> <i32 -1, i32 -3, i32 -5, i32 -7>
diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-int-arith.ll
index c59b1d430ff4f..28fa00274496c 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-arith.ll
@@ -598,9 +598,9 @@ define <vscale x 8 x i16> @muladd_i16_positiveAddend(<vscale x 8 x i16> %a, <vsc
define <vscale x 8 x i16> @muladd_i16_negativeAddend(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
; CHECK-LABEL: muladd_i16_negativeAddend:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.h, #-255 // =0xffffffffffffff01
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mad z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: sub z0.h, z0.h, #255 // =0xff
; CHECK-NEXT: ret
{
%1 = mul <vscale x 8 x i16> %a, %b
@@ -624,9 +624,9 @@ define <vscale x 16 x i8> @muladd_i8_positiveAddend(<vscale x 16 x i8> %a, <vsca
define <vscale x 16 x i8> @muladd_i8_negativeAddend(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
; CHECK-LABEL: muladd_i8_negativeAddend:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.b, #-15 // =0xfffffffffffffff1
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: mad z0.b, p0/m, z1.b, z2.b
+; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: sub z0.b, z0.b, #15 // =0xf
; CHECK-NEXT: ret
{
%1 = mul <vscale x 16 x i8> %a, %b
@@ -707,10 +707,9 @@ define <vscale x 8 x i16> @mulsub_i16_positiveAddend(<vscale x 8 x i16> %a, <vsc
define <vscale x 8 x i16> @mulsub_i16_negativeAddend(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
; CHECK-LABEL: mulsub_i16_negativeAddend:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z2.h, #255 // =0xff
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: mov z1.h, #-255 // =0xffffffffffffff01
-; CHECK-NEXT: sub z0.h, z0.h, z1.h
+; CHECK-NEXT: mad z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: ret
{
%1 = mul <vscale x 8 x i16> %a, %b
@@ -734,9 +733,9 @@ define <vscale x 16 x i8> @mulsub_i8_positiveAddend(<vscale x 16 x i8> %a, <vsca
define <vscale x 16 x i8> @mulsub_i8_negativeAddend(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
; CHECK-LABEL: mulsub_i8_negativeAddend:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z2.b, #15 // =0xf
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: sub z0.b, z0.b, #241 // =0xf1
+; CHECK-NEXT: mad z0.b, p0/m, z1.b, z2.b
; CHECK-NEXT: ret
{
%1 = mul <vscale x 16 x i8> %a, %b
diff --git a/llvm/test/CodeGen/AArch64/sve-int-imm.ll b/llvm/test/CodeGen/AArch64/sve-int-imm.ll
index e34f4840f517c..1baa47af23af5 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-imm.ll
@@ -73,7 +73,7 @@ define <vscale x 2 x i64> @add_i64_high(<vscale x 2 x i64> %a) {
define <vscale x 16 x i8> @add_i8_signedness(<vscale x 16 x i8> %a) {
; CHECK-LABEL: add_i8_signedness:
; CHECK: // %bb.0:
-; CHECK-NEXT: add z0.b, z0.b, #255 // =0xff
+; CHECK-NEXT: sub z0.b, z0.b, #1 // =0x1
; CHECK-NEXT: ret
%res = add <vscale x 16 x i8> %a, splat(i8 255)
ret <vscale x 16 x i8> %res
@@ -82,7 +82,7 @@ define <vscale x 16 x i8> @add_i8_signedness(<vscale x 16 x i8> %a) {
define <vscale x 8 x i16> @add_i16_signedness(<vscale x 8 x i16> %a) {
; CHECK-LABEL: add_i16_signedness:
; CHECK: // %bb.0:
-; CHECK-NEXT: add z0.h, z0.h, #65280 // =0xff00
+; CHECK-NEXT: sub z0.h, z0.h, #256 // =0x100
; CHECK-NEXT: ret
%res = add <vscale x 8 x i16> %a, splat(i16 65280)
ret <vscale x 8 x i16> %res
@@ -220,7 +220,7 @@ define <vscale x 2 x i64> @sub_i64_high(<vscale x 2 x i64> %a) {
define <vscale x 16 x i8> @addnve_i8_low(<vscale x 16 x i8> %a) {
; CHECK-LABEL: addnve_i8_low:
; CHECK: // %bb.0:
-; CHECK-NEXT: add z0.b, z0.b, #226 // =0xe2
+; CHECK-NEXT: sub z0.b, z0.b, #30 // =0x1e
; CHECK-NEXT: ret
%res = add <vscale x 16 x i8> %a, splat(i8 -30)
ret <vscale x 16 x i8> %res
@@ -229,8 +229,7 @@ define <vscale x 16 x i8> @addnve_i8_low(<vscale x 16 x i8> %a) {
define <vscale x 8 x i16> @addnve_i16_low(<vscale x 8 x i16> %a) {
; CHECK-LABEL: addnve_i16_low:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z1.h, #-30 // =0xffffffffffffffe2
-; CHECK-NEXT: add z0.h, z0.h, z1.h
+; CHECK-NEXT: sub z0.h, z0.h, #30 // =0x1e
; CHECK-NEXT: ret
%res = add <vscale x 8 x i16> %a, splat(i16 -30)
ret <vscale x 8 x i16> %res
@@ -239,7 +238,7 @@ define <vscale x 8 x i16> @addnve_i16_low(<vscale x 8 x i16> %a) {
define <vscale x 8 x i16> @addnve_i16_high(<vscale x 8 x i16> %a) {
; CHECK-LABEL: addnve_i16_high:
; CHECK: // %bb.0:
-; CHECK-NEXT: add z0.h, z0.h, #64512 // =0xfc00
+; CHECK-NEXT: sub z0.h, z0.h, #1024 // =0x400
; CHECK-NEXT: ret
%res = add <vscale x 8 x i16> %a, splat(i16 -1024)
ret <vscale x 8 x i16> %res
@@ -248,8 +247,7 @@ define <vscale x 8 x i16> @addnve_i16_high(<vscale x 8 x i16> %a) {
define <vscale x 4 x i32> @addnve_i32_low(<vscale x 4 x i32> %a) {
; CHECK-LABEL: addnve_i32_low:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z1.s, #-30 // =0xffffffffffffffe2
-; CHECK-NEXT: add z0.s, z0.s, z1.s
+; CHECK-NEXT: sub z0.s, z0.s, #30 // =0x1e
; CHECK-NEXT: ret
%res = add <vscale x 4 x i32> %a, splat(i32 -30)
ret <vscale x 4 x i32> %res
@@ -258,8 +256,7 @@ define <vscale x 4 x i32> @addnve_i32_low(<vscale x 4 x i32> %a) {
define <vscale x 4 x i32> @addnve_i32_high(<vscale x 4 x i32> %a) {
; CHECK-LABEL: addnve_i32_high:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z1.s, #-1024 // =0xfffffffffffffc00
-; CHECK-NEXT: add z0.s, z0.s, z1.s
+; CHECK-NEXT: sub z0.s, z0.s, #1024 // =0x400
; CHECK-NEXT: ret
%res = add <vscale x 4 x i32> %a, splat(i32 -1024)
ret <vscale x 4 x i32> %res
@@ -268,8 +265,7 @@ define <vscale x 4 x i32> @addnve_i32_high(<vscale x 4 x i32> %a) {
define <vscale x 2 x i64> @addnve_i64_low(<vscale x 2 x i64> %a) {
; CHECK-LABEL: addnve_i64_low:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z1.d, #-30 // =0xffffffffffffffe2
-; CHECK-NEXT: add z0.d, z0.d, z1.d
+; CHECK-NEXT: sub z0.d, z0.d, #30 // =0x1e
; CHECK-NEXT: ret
%res = add <vscale x 2 x i64> %a, splat(i64 -30)
ret <vscale x 2 x i64> %res
@@ -278,8 +274,7 @@ define <vscale x 2 x i64> @addnve_i64_low(<vscale x 2 x i64> %a) {
define <vscale x 2 x i64> @addnve_i64_high(<vscale x 2 x i64> %a) {
; CHECK-LABEL: addnve_i64_high:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z1.d, #-1024 // =0xfffffffffffffc00
-; CHECK-NEXT: add z0.d, z0.d, z1.d
+; CHECK-NEXT: sub z0.d, z0.d, #1024 // =0x400
; CHECK-NEXT: ret
%res = add <vscale x 2 x i64> %a, splat(i64 -1024)
ret <vscale x 2 x i64> %res
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
index 4d4b1b67bbafc..0d4feced41181 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
@@ -8,7 +8,8 @@
define <vscale x 16 x i8> @index_ii_i8() {
; CHECK-LABEL: index_ii_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z0.b, #-16, #15
+; CHECK-NEXT: index z0.b, #0, #15
+; CHECK-NEXT: sub z0.b, z0.b, #16 // =0x10
; CHECK-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.index.nxv16i8(i8 -16, i8 15)
ret <vscale x 16 x i8> %out
@@ -26,7 +27,8 @@ define <vscale x 8 x i16> @index_ii_i16() {
define <vscale x 4 x i32> @index_ii_i32() {
; CHECK-LABEL: index_ii_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z0.s, #-16, #15
+; CHECK-NEXT: index z0.s, #0, #15
+; CHECK-NEXT: sub z0.s, z0.s, #16 // =0x10
; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32 -16, i32 15)
ret <vscale x 4 x i32> %out
@@ -45,8 +47,8 @@ define <vscale x 2 x i64> @index_ii_range() {
; CHECK-LABEL: index_ii_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #16 // =0x10
-; CHECK-NEXT: mov x9, #-17 // =0xffffffffffffffef
-; CHECK-NEXT: index z0.d, x9, x8
+; CHECK-NEXT: index z0.d, #0, x8
+; CHECK-NEXT: sub z0.d, z0.d, #17 // =0x11
; CHECK-NEXT: ret
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 -17, i64 16)
ret <vscale x 2 x i64> %out
@@ -60,7 +62,7 @@ define <vscale x 8 x i16> @index_ii_range_combine(i16 %a) {
; CHECK-NEXT: ret
%val2 = call <vscale x 8 x i16> @llvm.aarch64.sve.index.nxv8i16(i16 0, i16 2)
%val3 = shl <vscale x 8 x i16> %val2, splat(i16 2)
- %out = add <vscale x 8 x i16> %val3, splat(i16 2)
+ %out = add <vscale x 8 x i16> %val3, splat(i16 2)
ret <vscale x 8 x i16> %out
}
@@ -80,7 +82,8 @@ define <vscale x 16 x i8> @index_ir_i8(i8 %a) {
define <vscale x 8 x i16> @index_ir_i16(i16 %a) {
; CHECK-LABEL: index_ir_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z0.h, #-16, w0
+; CHECK-NEXT: index z0.h, #0, w0
+; CHECK-NEXT: sub z0.h, z0.h, #16 // =0x10
; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.index.nxv8i16(i16 -16, i16 %a)
ret <vscale x 8 x i16> %out
@@ -98,7 +101,8 @@ define <vscale x 4 x i32> @index_ir_i32(i32 %a) {
define <vscale x 2 x i64> @index_ir_i64(i64 %a) {
; CHECK-LABEL: index_ir_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z0.d, #-16, x0
+; CHECK-NEXT: index z0.d, #0, x0
+; CHECK-NEXT: sub z0.d, z0.d, #16 // =0x10
; CHECK-NEXT: ret
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 -16, i64 %a)
ret <vscale x 2 x i64> %out
@@ -107,8 +111,8 @@ define <vscale x 2 x i64> @index_ir_i64(i64 %a) {
define <vscale x 4 x i32> @index_ir_range(i32 %a) {
; CHECK-LABEL: index_ir_range:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-17 // =0xffffffef
-; CHECK-NEXT: index z0.s, w8, w0
+; CHECK-NEXT: index z0.s, #0, w0
+; CHECK-NEXT: sub z0.s, z0.s, #17 // =0x11
; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32 -17, i32 %a)
ret <vscale x 4 x i32> %out
@@ -120,7 +124,7 @@ define <vscale x 4 x i32> @index_ir_range_combine(i32 %a) {
; CHECK-NEXT: index z0.s, #0, w0
; CHECK-NEXT: ret
%tmp = call <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32 2, i32 1)
- %tmp1 = sub <vscale x 4 x i32> %tmp, splat(i32 2)
+ %tmp1 = sub <vscale x 4 x i32> %tmp, splat(i32 2)
%val2 = insertelement <vscale x 4 x i32> poison, i32 %a, i32 0
%val3 = shufflevector <vscale x 4 x i32> %val2, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
%out = mul <vscale x 4 x i32> %tmp1, %val3
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
index d29e43509dfe9..71396da004002 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
@@ -14,20 +14,21 @@ target triple = "aarch64"
define <8 x i32> @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %right_ptr) {
; CHECK-LABEL: fixed_bitselect_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ldp q2, q1, [x0]
+; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ldp q5, q4, [x1]
; CHECK-NEXT: ldp q6, q7, [x2]
-; CHECK-NEXT: add z3.s, z1.s, z0.s
-; CHECK-NEXT: subr z1.s, z1.s, #0 // =0x0
-; CHECK-NEXT: add z0.s, z2.s, z0.s
+; CHECK-NEXT: mov z2.d, z0.d
+; CHECK-NEXT: mov z3.d, z1.d
+; CHECK-NEXT: sub z1.s, z1.s, #1 // =0x1
+; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
; CHECK-NEXT: subr z2.s, z2.s, #0 // =0x0
-; CHECK-NEXT: and z1.d, z1.d, z4.d
-; CHECK-NEXT: and z3.d, z3.d, z7.d
-; CHECK-NEXT: and z0.d, z0.d, z6.d
-; CHECK-NEXT: and z2.d, z2.d, z5.d
-; CHECK-NEXT: orr z1.d, z3.d, z1.d
-; CHECK-NEXT: orr z0.d, z0.d, z2.d
+; CHECK-NEXT: subr z3.s, z3.s, #0 // =0x0
+; CHECK-NEXT: and z2.d, z2.d, z4.d
+; CHECK-NEXT: and z3.d, z3.d, z5.d
+; CHECK-NEXT: and z4.d, z0.d, z7.d
+; CHECK-NEXT: and z0.d, z1.d, z6.d
+; CHECK-NEXT: orr z1.d, z4.d, z2.d
+; CHECK-NEXT: orr z0.d, z0.d, z3.d
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
index 3a6445dd1d99b..d226fc89c3381 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
@@ -69,9 +69,9 @@ define void @build_vector_0_dec3_v8i32(ptr %a) {
; CHECK-LABEL: build_vector_0_dec3_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.s, #0, #-3
-; CHECK-NEXT: mov z1.s, #-12 // =0xfffffffffffffff4
-; CHECK-NEXT: add z1.s, z0.s, z1.s
-; CHECK-NEXT: stp q0, q1, [x0]
+; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: sub z0.s, z0.s, #12 // =0xc
+; CHECK-NEXT: str q0, [x0, #16]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: build_vector_0_dec3_v8i32:
@@ -91,11 +91,10 @@ define void @build_vector_minus2_dec32_v4i64(ptr %a) {
; CHECK-LABEL: build_vector_minus2_dec32_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-32 // =0xffffffffffffffe0
-; CHECK-NEXT: mov z1.d, #-66 // =0xffffffffffffffbe
-; CHECK-NEXT: mov z2.d, #-2 // =0xfffffffffffffffe
; CHECK-NEXT: index z0.d, #0, x8
-; CHECK-NEXT: add z1.d, z0.d, z1.d
-; CHECK-NEXT: add z0.d, z0.d, z2.d
+; CHECK-NEXT: mov z1.d, z0.d
+; CHECK-NEXT: sub z0.d, z0.d, #2 // =0x2
+; CHECK-NEXT: sub z1.d, z1.d, #66 // =0x42
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
index 6cfe66eb8e633..91502aa479c2d 100644
--- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
@@ -16,9 +16,12 @@ define <vscale x 4 x i32> @bsl(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
define <vscale x 4 x i32> @bsl_add_sub(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
; CHECK-LABEL: bsl_add_sub:
; CHECK: // %bb.0:
-; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
-; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
-; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: mov z3.d, z0.d
+; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
+; CHECK-NEXT: subr z3.s, z3.s, #0 // =0x0
+; CHECK-NEXT: and z0.d, z0.d, z2.d
+; CHECK-NEXT: and z1.d, z3.d, z1.d
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
%min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
>From 5fa66ce4f953a7abc070f768d6c028d8f7b8218d Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 29 Aug 2025 10:55:46 +0100
Subject: [PATCH 2/3] Via tablegen
---
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 27 ++++++++++---------
.../Target/AArch64/AArch64ISelLowering.cpp | 9 -------
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 6 +++++
llvm/lib/Target/AArch64/SVEInstrFormats.td | 15 +++++++----
.../sve-fixed-length-addressing-modes.ll | 3 +--
.../AArch64/sve-index-const-step-vector.ll | 3 +--
llvm/test/CodeGen/AArch64/sve-int-arith.ll | 17 ++++++------
llvm/test/CodeGen/AArch64/sve-int-imm.ll | 8 +++---
.../CodeGen/AArch64/sve-intrinsics-index.ll | 16 +++++------
llvm/test/CodeGen/AArch64/sve2-bsl.ll | 9 +++----
10 files changed, 54 insertions(+), 59 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index bc786f415b554..42d1c1dd594f1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -246,9 +246,9 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
return false;
}
- template<MVT::SimpleValueType VT>
+ template<MVT::SimpleValueType VT, bool Negate>
bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
- return SelectSVEAddSubImm(N, VT, Imm, Shift);
+ return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
}
template <MVT::SimpleValueType VT, bool Negate>
@@ -489,7 +489,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
bool SelectCMP_SWAP(SDNode *N);
- bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
+ bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift, bool Negate);
bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
bool Negate);
bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
@@ -4227,35 +4227,36 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
}
bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
- SDValue &Shift) {
+ SDValue &Shift, bool Negate) {
if (!isa<ConstantSDNode>(N))
return false;
SDLoc DL(N);
- uint64_t Val = cast<ConstantSDNode>(N)
- ->getAPIntValue()
- .trunc(VT.getFixedSizeInBits())
- .getZExtValue();
+ APInt Val =
+ cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
+
+ if (Negate)
+ Val = -Val;
switch (VT.SimpleTy) {
case MVT::i8:
// All immediates are supported.
Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
+ Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
return true;
case MVT::i16:
case MVT::i32:
case MVT::i64:
// Support 8bit unsigned immediates.
- if (Val <= 255) {
+ if ((Val & ~0xff) == 0) {
Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
+ Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
return true;
}
// Support 16bit unsigned immediates that are a multiple of 256.
- if (Val <= 65280 && Val % 256 == 0) {
+ if ((Val & ~0xff00) == 0) {
Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
- Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
+ Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
return true;
}
break;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index da65c315a2d25..23328ed57fb36 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -21813,15 +21813,6 @@ static SDValue performAddSubCombine(SDNode *N,
if (SDValue Val = performExtBinopLoadFold(N, DCI.DAG))
return Val;
- APInt Imm;
- if (N->getValueType(0).isScalableVector() &&
- ISD::isConstantSplatVector(N->getOperand(1).getNode(), Imm) &&
- Imm.isNegative() && ((-Imm & ~0xff) == 0 || (-Imm & ~0xff00) == 0))
- return DCI.DAG.getNode(
- N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD, SDLoc(N),
- N->getValueType(0), N->getOperand(0),
- DCI.DAG.getConstant(-Imm, SDLoc(N), N->getValueType(0)));
-
return performAddSubLongCombine(N, DCI);
}
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index eeb47b4d99750..58d8dcff4f34e 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -657,6 +657,12 @@ let Predicates = [HasSVE_or_SME] in {
defm SQSUB_ZI : sve_int_arith_imm0_ssat<0b110, "sqsub", ssubsat, saddsat>;
defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub", usubsat>;
+ // Extra patterns for add(x, splat(-ve)) -> sub(x, +ve). There is no i8
+ // pattern as all i8 constants can be handled by an add.
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, add, ZPR16, i32, SVEAddSubNegImm16Pat, SUB_ZI_H>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, add, ZPR32, i32, SVEAddSubNegImm32Pat, SUB_ZI_S>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, add, ZPR64, i64, SVEAddSubNegImm64Pat, SUB_ZI_D>;
+
defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", AArch64mad_m1, "MLA_ZPmZZ", /*isReverseInstr*/ 1>;
defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb", AArch64msb_m1, "MLS_ZPmZZ", /*isReverseInstr*/ 1>;
defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla", AArch64mla_m1, "MLA_ZPZZZ", "MAD_ZPmZZ">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index a3a7d0f74e1bc..e7b680f9880c2 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -315,10 +315,15 @@ def addsub_imm8_opt_lsl_i16 : imm8_opt_lsl<16, "uint16_t", SVEAddSubImmOperand16
def addsub_imm8_opt_lsl_i32 : imm8_opt_lsl<32, "uint32_t", SVEAddSubImmOperand32>;
def addsub_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "uint64_t", SVEAddSubImmOperand64>;
-def SVEAddSubImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i8>", []>;
-def SVEAddSubImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i16>", []>;
-def SVEAddSubImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i32>", []>;
-def SVEAddSubImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubImm<MVT::i64>", []>;
+def SVEAddSubImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i8, false>", []>;
+def SVEAddSubImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i16, false>", []>;
+def SVEAddSubImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i32, false>", []>;
+def SVEAddSubImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubImm<MVT::i64, false>", []>;
+
+def SVEAddSubNegImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i8, true>", []>;
+def SVEAddSubNegImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i16, true>", []>;
+def SVEAddSubNegImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i32, true>", []>;
+def SVEAddSubNegImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubImm<MVT::i64, true>", []>;
def SVEAddSubSSatNegImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i8, true>", []>;
def SVEAddSubSSatNegImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i16, true>", []>;
@@ -529,7 +534,7 @@ multiclass SVE_1_Op_PassthruUndef_Round_Pat<ValueType vtd, SDPatternOperator op,
class SVE_1_Op_PassthruZero_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, Instruction inst>
: Pat<(vtd (op (vtd (SVEDup0)), vt1:$Op1, vt2:$Op2)),
- (inst (IMPLICIT_DEF), $Op1, $Op2)>;
+ (inst (IMPLICIT_DEF), $Op1, $Op2)>;
class SVE_1_Op_Imm_OptLsl_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
ValueType it, ComplexPattern cpx, Instruction inst>
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-addressing-modes.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-addressing-modes.ll
index 019089812805f..ebd6f53ed4a08 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-addressing-modes.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-addressing-modes.ll
@@ -51,10 +51,9 @@ define void @masked_scatter_base_plus_stride_v8f32(ptr %dst, ptr %src) #0 {
define void @masked_scatter_base_plus_stride_v4f64(ptr %dst, ptr %src) #0 {
; CHECK-LABEL: masked_scatter_base_plus_stride_v4f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z0.d, #0, #3
; CHECK-NEXT: ptrue p0.d, vl4
+; CHECK-NEXT: index z0.d, #-2, #3
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
-; CHECK-NEXT: sub z0.d, z0.d, #2 // =0x2
; CHECK-NEXT: st1d { z1.d }, p0, [x0, z0.d, lsl #3]
; CHECK-NEXT: ret
%data = load <4 x double>, ptr %src, align 8
diff --git a/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll b/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
index de3287587c200..433ddbd4a261b 100644
--- a/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
@@ -84,8 +84,7 @@ define <4 x i32> @v4i32_non_zero_non_one() #0 {
define <4 x i32> @v4i32_neg_immediates() #0 {
; CHECK-LABEL: v4i32_neg_immediates:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z0.s, #0, #-2
-; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
+; CHECK-NEXT: index z0.s, #-1, #-2
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
ret <4 x i32> <i32 -1, i32 -3, i32 -5, i32 -7>
diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-int-arith.ll
index 28fa00274496c..c59b1d430ff4f 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-arith.ll
@@ -598,9 +598,9 @@ define <vscale x 8 x i16> @muladd_i16_positiveAddend(<vscale x 8 x i16> %a, <vsc
define <vscale x 8 x i16> @muladd_i16_negativeAddend(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
; CHECK-LABEL: muladd_i16_negativeAddend:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z2.h, #-255 // =0xffffffffffffff01
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: sub z0.h, z0.h, #255 // =0xff
+; CHECK-NEXT: mad z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: ret
{
%1 = mul <vscale x 8 x i16> %a, %b
@@ -624,9 +624,9 @@ define <vscale x 16 x i8> @muladd_i8_positiveAddend(<vscale x 16 x i8> %a, <vsca
define <vscale x 16 x i8> @muladd_i8_negativeAddend(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
; CHECK-LABEL: muladd_i8_negativeAddend:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z2.b, #-15 // =0xfffffffffffffff1
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: sub z0.b, z0.b, #15 // =0xf
+; CHECK-NEXT: mad z0.b, p0/m, z1.b, z2.b
; CHECK-NEXT: ret
{
%1 = mul <vscale x 16 x i8> %a, %b
@@ -707,9 +707,10 @@ define <vscale x 8 x i16> @mulsub_i16_positiveAddend(<vscale x 8 x i16> %a, <vsc
define <vscale x 8 x i16> @mulsub_i16_negativeAddend(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
; CHECK-LABEL: mulsub_i16_negativeAddend:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.h, #255 // =0xff
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mad z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: mov z1.h, #-255 // =0xffffffffffffff01
+; CHECK-NEXT: sub z0.h, z0.h, z1.h
; CHECK-NEXT: ret
{
%1 = mul <vscale x 8 x i16> %a, %b
@@ -733,9 +734,9 @@ define <vscale x 16 x i8> @mulsub_i8_positiveAddend(<vscale x 16 x i8> %a, <vsca
define <vscale x 16 x i8> @mulsub_i8_negativeAddend(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
; CHECK-LABEL: mulsub_i8_negativeAddend:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.b, #15 // =0xf
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: mad z0.b, p0/m, z1.b, z2.b
+; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: sub z0.b, z0.b, #241 // =0xf1
; CHECK-NEXT: ret
{
%1 = mul <vscale x 16 x i8> %a, %b
diff --git a/llvm/test/CodeGen/AArch64/sve-int-imm.ll b/llvm/test/CodeGen/AArch64/sve-int-imm.ll
index 1baa47af23af5..985b7b9597705 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-imm.ll
@@ -73,7 +73,7 @@ define <vscale x 2 x i64> @add_i64_high(<vscale x 2 x i64> %a) {
define <vscale x 16 x i8> @add_i8_signedness(<vscale x 16 x i8> %a) {
; CHECK-LABEL: add_i8_signedness:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub z0.b, z0.b, #1 // =0x1
+; CHECK-NEXT: add z0.b, z0.b, #255 // =0xff
; CHECK-NEXT: ret
%res = add <vscale x 16 x i8> %a, splat(i8 255)
ret <vscale x 16 x i8> %res
@@ -82,7 +82,7 @@ define <vscale x 16 x i8> @add_i8_signedness(<vscale x 16 x i8> %a) {
define <vscale x 8 x i16> @add_i16_signedness(<vscale x 8 x i16> %a) {
; CHECK-LABEL: add_i16_signedness:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub z0.h, z0.h, #256 // =0x100
+; CHECK-NEXT: add z0.h, z0.h, #65280 // =0xff00
; CHECK-NEXT: ret
%res = add <vscale x 8 x i16> %a, splat(i16 65280)
ret <vscale x 8 x i16> %res
@@ -220,7 +220,7 @@ define <vscale x 2 x i64> @sub_i64_high(<vscale x 2 x i64> %a) {
define <vscale x 16 x i8> @addnve_i8_low(<vscale x 16 x i8> %a) {
; CHECK-LABEL: addnve_i8_low:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub z0.b, z0.b, #30 // =0x1e
+; CHECK-NEXT: add z0.b, z0.b, #226 // =0xe2
; CHECK-NEXT: ret
%res = add <vscale x 16 x i8> %a, splat(i8 -30)
ret <vscale x 16 x i8> %res
@@ -238,7 +238,7 @@ define <vscale x 8 x i16> @addnve_i16_low(<vscale x 8 x i16> %a) {
define <vscale x 8 x i16> @addnve_i16_high(<vscale x 8 x i16> %a) {
; CHECK-LABEL: addnve_i16_high:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub z0.h, z0.h, #1024 // =0x400
+; CHECK-NEXT: add z0.h, z0.h, #64512 // =0xfc00
; CHECK-NEXT: ret
%res = add <vscale x 8 x i16> %a, splat(i16 -1024)
ret <vscale x 8 x i16> %res
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
index 0d4feced41181..157f63381a4a3 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
@@ -8,8 +8,7 @@
define <vscale x 16 x i8> @index_ii_i8() {
; CHECK-LABEL: index_ii_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z0.b, #0, #15
-; CHECK-NEXT: sub z0.b, z0.b, #16 // =0x10
+; CHECK-NEXT: index z0.b, #-16, #15
; CHECK-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.index.nxv16i8(i8 -16, i8 15)
ret <vscale x 16 x i8> %out
@@ -27,8 +26,7 @@ define <vscale x 8 x i16> @index_ii_i16() {
define <vscale x 4 x i32> @index_ii_i32() {
; CHECK-LABEL: index_ii_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z0.s, #0, #15
-; CHECK-NEXT: sub z0.s, z0.s, #16 // =0x10
+; CHECK-NEXT: index z0.s, #-16, #15
; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32 -16, i32 15)
ret <vscale x 4 x i32> %out
@@ -82,8 +80,7 @@ define <vscale x 16 x i8> @index_ir_i8(i8 %a) {
define <vscale x 8 x i16> @index_ir_i16(i16 %a) {
; CHECK-LABEL: index_ir_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z0.h, #0, w0
-; CHECK-NEXT: sub z0.h, z0.h, #16 // =0x10
+; CHECK-NEXT: index z0.h, #-16, w0
; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.index.nxv8i16(i16 -16, i16 %a)
ret <vscale x 8 x i16> %out
@@ -101,8 +98,7 @@ define <vscale x 4 x i32> @index_ir_i32(i32 %a) {
define <vscale x 2 x i64> @index_ir_i64(i64 %a) {
; CHECK-LABEL: index_ir_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z0.d, #0, x0
-; CHECK-NEXT: sub z0.d, z0.d, #16 // =0x10
+; CHECK-NEXT: index z0.d, #-16, x0
; CHECK-NEXT: ret
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 -16, i64 %a)
ret <vscale x 2 x i64> %out
@@ -111,8 +107,8 @@ define <vscale x 2 x i64> @index_ir_i64(i64 %a) {
define <vscale x 4 x i32> @index_ir_range(i32 %a) {
; CHECK-LABEL: index_ir_range:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z0.s, #0, w0
-; CHECK-NEXT: sub z0.s, z0.s, #17 // =0x11
+; CHECK-NEXT: mov w8, #-17 // =0xffffffef
+; CHECK-NEXT: index z0.s, w8, w0
; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32 -17, i32 %a)
ret <vscale x 4 x i32> %out
diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
index 91502aa479c2d..6cfe66eb8e633 100644
--- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
@@ -16,12 +16,9 @@ define <vscale x 4 x i32> @bsl(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
define <vscale x 4 x i32> @bsl_add_sub(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
; CHECK-LABEL: bsl_add_sub:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z3.d, z0.d
-; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
-; CHECK-NEXT: subr z3.s, z3.s, #0 // =0x0
-; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z3.d, z1.d
-; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
%neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
%min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
>From f3bddf8549c800d7f5ff2c72e62ce21c8ef66e3f Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 29 Aug 2025 14:27:15 +0100
Subject: [PATCH 3/3] Add a Complexity to ensure index is selected.
---
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 5 +++--
llvm/lib/Target/AArch64/SVEInstrFormats.td | 4 +++-
.../CodeGen/AArch64/sve-fixed-length-addressing-modes.ll | 3 +--
.../test/CodeGen/AArch64/sve-fixed-length-build-vector.ll | 3 +--
llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll | 4 ++--
llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll | 8 ++++----
6 files changed, 14 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 42d1c1dd594f1..6fdc981fc21a5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -246,7 +246,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
return false;
}
- template<MVT::SimpleValueType VT, bool Negate>
+ template <MVT::SimpleValueType VT, bool Negate>
bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
}
@@ -489,7 +489,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
bool SelectCMP_SWAP(SDNode *N);
- bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift, bool Negate);
+ bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
+ bool Negate);
bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
bool Negate);
bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index e7b680f9880c2..7389f90457234 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -315,6 +315,7 @@ def addsub_imm8_opt_lsl_i16 : imm8_opt_lsl<16, "uint16_t", SVEAddSubImmOperand16
def addsub_imm8_opt_lsl_i32 : imm8_opt_lsl<32, "uint32_t", SVEAddSubImmOperand32>;
def addsub_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "uint64_t", SVEAddSubImmOperand64>;
+let Complexity = 1 in {
def SVEAddSubImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i8, false>", []>;
def SVEAddSubImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i16, false>", []>;
def SVEAddSubImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i32, false>", []>;
@@ -334,6 +335,7 @@ def SVEAddSubSSatPosImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MV
def SVEAddSubSSatPosImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i16, false>", []>;
def SVEAddSubSSatPosImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i32, false>", []>;
def SVEAddSubSSatPosImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubSSatImm<MVT::i64, false>", []>;
+} // Complexity = 1
def SVECpyDupImm8Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i8>", []>;
def SVECpyDupImm16Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i16>", []>;
@@ -534,7 +536,7 @@ multiclass SVE_1_Op_PassthruUndef_Round_Pat<ValueType vtd, SDPatternOperator op,
class SVE_1_Op_PassthruZero_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, Instruction inst>
: Pat<(vtd (op (vtd (SVEDup0)), vt1:$Op1, vt2:$Op2)),
- (inst (IMPLICIT_DEF), $Op1, $Op2)>;
+ (inst (IMPLICIT_DEF), $Op1, $Op2)>;
class SVE_1_Op_Imm_OptLsl_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
ValueType it, ComplexPattern cpx, Instruction inst>
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-addressing-modes.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-addressing-modes.ll
index ebd6f53ed4a08..7ccf899c70e31 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-addressing-modes.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-addressing-modes.ll
@@ -23,8 +23,7 @@ define void @masked_gather_base_plus_stride_v4f64(ptr %dst, ptr %src) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-32 // =0xffffffffffffffe0
; CHECK-NEXT: ptrue p0.d, vl4
-; CHECK-NEXT: index z0.d, #0, x8
-; CHECK-NEXT: sub z0.d, z0.d, #2 // =0x2
+; CHECK-NEXT: index z0.d, #-2, x8
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1, z0.d, lsl #3]
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-build-vector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-build-vector.ll
index 0a918a8160137..47fda39d84001 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-build-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-build-vector.ll
@@ -44,8 +44,7 @@ define void @build_vector_minus2_dec32_v4i64(ptr %a) #0 {
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #-32 // =0xffffffffffffffe0
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
-; VBITS_GE_256-NEXT: index z0.d, #0, x8
-; VBITS_GE_256-NEXT: sub z0.d, z0.d, #2 // =0x2
+; VBITS_GE_256-NEXT: index z0.d, #-2, x8
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
store <4 x i64> <i64 -2, i64 -34, i64 -66, i64 -98>, ptr %a, align 8
diff --git a/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll b/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
index 433ddbd4a261b..cf2ae02c14b18 100644
--- a/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
@@ -94,8 +94,8 @@ define <4 x i32> @v4i32_neg_immediates() #0 {
define <4 x i32> @v4i32_out_range_start() #0 {
; CHECK-LABEL: v4i32_out_range_start:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z0.s, #0, #1
-; CHECK-NEXT: add z0.s, z0.s, #16 // =0x10
+; CHECK-NEXT: mov w8, #16 // =0x10
+; CHECK-NEXT: index z0.s, w8, #1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
ret <4 x i32> <i32 16, i32 17, i32 18, i32 19>
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
index 157f63381a4a3..4d4b1b67bbafc 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
@@ -45,8 +45,8 @@ define <vscale x 2 x i64> @index_ii_range() {
; CHECK-LABEL: index_ii_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #16 // =0x10
-; CHECK-NEXT: index z0.d, #0, x8
-; CHECK-NEXT: sub z0.d, z0.d, #17 // =0x11
+; CHECK-NEXT: mov x9, #-17 // =0xffffffffffffffef
+; CHECK-NEXT: index z0.d, x9, x8
; CHECK-NEXT: ret
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 -17, i64 16)
ret <vscale x 2 x i64> %out
@@ -60,7 +60,7 @@ define <vscale x 8 x i16> @index_ii_range_combine(i16 %a) {
; CHECK-NEXT: ret
%val2 = call <vscale x 8 x i16> @llvm.aarch64.sve.index.nxv8i16(i16 0, i16 2)
%val3 = shl <vscale x 8 x i16> %val2, splat(i16 2)
- %out = add <vscale x 8 x i16> %val3, splat(i16 2)
+ %out = add <vscale x 8 x i16> %val3, splat(i16 2)
ret <vscale x 8 x i16> %out
}
@@ -120,7 +120,7 @@ define <vscale x 4 x i32> @index_ir_range_combine(i32 %a) {
; CHECK-NEXT: index z0.s, #0, w0
; CHECK-NEXT: ret
%tmp = call <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32 2, i32 1)
- %tmp1 = sub <vscale x 4 x i32> %tmp, splat(i32 2)
+ %tmp1 = sub <vscale x 4 x i32> %tmp, splat(i32 2)
%val2 = insertelement <vscale x 4 x i32> poison, i32 %a, i32 0
%val3 = shufflevector <vscale x 4 x i32> %val2, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
%out = mul <vscale x 4 x i32> %tmp1, %val3
More information about the llvm-commits
mailing list