[llvm] [LLVM][SVE][CodeGen] Fix incorrect isel for signed saturating instructions. (PR #88136)

Paul Walker via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 11 05:07:40 PDT 2024


https://github.com/paulwalker-arm updated https://github.com/llvm/llvm-project/pull/88136

>From 284e314cefb650cddb3d226cb283bd032dc715f0 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Tue, 9 Apr 2024 14:53:23 +0000
Subject: [PATCH 1/3] Add tests to highlight signed saturating add/sub bug.

---
 .../AArch64/sve-intrinsics-int-arith-imm.ll   | 110 ++++++++++++++++++
 1 file changed, 110 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
index c70006d988c19b..83db28a99ce55b 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
@@ -1059,6 +1059,19 @@ define <vscale x 16 x i8> @sqadd_b_lowimm(<vscale x 16 x i8> %a) {
   ret <vscale x 16 x i8> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 16 x i8> @sqadd_b_negimm(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: sqadd_b_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqadd z0.b, z0.b, #255 // =0xff
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 16 x i8> undef, i8 -1, i32 0
+  %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.x.nxv16i8(<vscale x 16 x i8> %a,
+                                                                   <vscale x 16 x i8> %splat)
+  ret <vscale x 16 x i8> %out
+}
+
 define <vscale x 8 x i16> @sqadd_h_lowimm(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: sqadd_h_lowimm:
 ; CHECK:       // %bb.0:
@@ -1083,6 +1096,20 @@ define <vscale x 8 x i16> @sqadd_h_highimm(<vscale x 8 x i16> %a) {
   ret <vscale x 8 x i16> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 8 x i16> @sqadd_h_negimm(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: sqadd_h_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.h, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sqadd z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 8 x i16> undef, i16 -1, i32 0
+  %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16> %a,
+                                                                   <vscale x 8 x i16> %splat)
+  ret <vscale x 8 x i16> %out
+}
+
 define <vscale x 4 x i32> @sqadd_s_lowimm(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sqadd_s_lowimm:
 ; CHECK:       // %bb.0:
@@ -1107,6 +1134,20 @@ define <vscale x 4 x i32> @sqadd_s_highimm(<vscale x 4 x i32> %a) {
   ret <vscale x 4 x i32> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 4 x i32> @sqadd_s_negimm(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: sqadd_s_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.s, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sqadd z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 4 x i32> undef, i32 -1, i32 0
+  %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32> %a,
+                                                                   <vscale x 4 x i32> %splat)
+  ret <vscale x 4 x i32> %out
+}
+
 define <vscale x 2 x i64> @sqadd_d_lowimm(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: sqadd_d_lowimm:
 ; CHECK:       // %bb.0:
@@ -1131,6 +1172,20 @@ define <vscale x 2 x i64> @sqadd_d_highimm(<vscale x 2 x i64> %a) {
   ret <vscale x 2 x i64> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 2 x i64> @sqadd_d_negimm(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: sqadd_d_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sqadd z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 2 x i64> undef, i64 -1, i32 0
+  %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64> %a,
+                                                                   <vscale x 2 x i64> %splat)
+  ret <vscale x 2 x i64> %out
+}
+
 ; SQSUB
 
 define <vscale x 16 x i8> @sqsub_b_lowimm(<vscale x 16 x i8> %a) {
@@ -1145,6 +1200,19 @@ define <vscale x 16 x i8> @sqsub_b_lowimm(<vscale x 16 x i8> %a) {
   ret <vscale x 16 x i8> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 16 x i8> @sqsub_b_negimm(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: sqsub_b_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqsub z0.b, z0.b, #255 // =0xff
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 16 x i8> undef, i8 -1, i32 0
+  %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.x.nxv16i8(<vscale x 16 x i8> %a,
+                                                                   <vscale x 16 x i8> %splat)
+  ret <vscale x 16 x i8> %out
+}
+
 define <vscale x 8 x i16> @sqsub_h_lowimm(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: sqsub_h_lowimm:
 ; CHECK:       // %bb.0:
@@ -1169,6 +1237,20 @@ define <vscale x 8 x i16> @sqsub_h_highimm(<vscale x 8 x i16> %a) {
   ret <vscale x 8 x i16> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 8 x i16> @sqsub_h_negimm(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: sqsub_h_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.h, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sqsub z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 8 x i16> undef, i16 -1, i32 0
+  %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16> %a,
+                                                                   <vscale x 8 x i16> %splat)
+  ret <vscale x 8 x i16> %out
+}
+
 define <vscale x 4 x i32> @sqsub_s_lowimm(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sqsub_s_lowimm:
 ; CHECK:       // %bb.0:
@@ -1193,6 +1275,20 @@ define <vscale x 4 x i32> @sqsub_s_highimm(<vscale x 4 x i32> %a) {
   ret <vscale x 4 x i32> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 4 x i32> @sqsub_s_negimm(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: sqsub_s_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.s, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sqsub z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 4 x i32> undef, i32 -1, i32 0
+  %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32> %a,
+                                                                   <vscale x 4 x i32> %splat)
+  ret <vscale x 4 x i32> %out
+}
+
 define <vscale x 2 x i64> @sqsub_d_lowimm(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: sqsub_d_lowimm:
 ; CHECK:       // %bb.0:
@@ -1217,6 +1313,20 @@ define <vscale x 2 x i64> @sqsub_d_highimm(<vscale x 2 x i64> %a) {
   ret <vscale x 2 x i64> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 2 x i64> @sqsub_d_negimm(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: sqsub_d_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sqsub z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 2 x i64> undef, i64 -1, i32 0
+  %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64> %a,
+                                                                   <vscale x 2 x i64> %splat)
+  ret <vscale x 2 x i64> %out
+}
+
 ; UQADD
 
 define <vscale x 16 x i8> @uqadd_b_lowimm(<vscale x 16 x i8> %a) {

>From f6611d508a5a25174a7afab9bf4522abcb1e2bba Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Fri, 5 Apr 2024 15:43:53 +0100
Subject: [PATCH 2/3] [LLVM][SVE][CodeGen] Fix incorrect isel for signed
 saturating instructions.

The immediate forms of SQADD and SQSUB interpret their immediate
operand as unsigned and thus effectively only support positive
immediate operands.

The original code is only wrong for the i8 cases because they
previously accepted all values, however, I've also added tests for
the larger element types just in case.
---
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp    | 50 +++++++++++++++++++
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  4 +-
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 17 +++++++
 .../AArch64/sve-intrinsics-int-arith-imm.ll   |  6 ++-
 4 files changed, 73 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 51bec3604026b0..7ef0bb9230aac5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -258,6 +258,11 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
     return SelectSVEAddSubImm(N, VT, Imm, Shift);
   }
 
+  template <MVT::SimpleValueType VT>
+  bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
+    return SelectSVEAddSubSSatImm(N, VT, Imm, Shift);
+  }
+
   template <MVT::SimpleValueType VT>
   bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
     return SelectSVECpyDupImm(N, VT, Imm, Shift);
@@ -484,6 +489,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
   bool SelectCMP_SWAP(SDNode *N);
 
   bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
+  bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
   bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
   bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
 
@@ -4014,6 +4020,50 @@ bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
   return false;
 }
 
+bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
+                                                 SDValue &Imm, SDValue &Shift) {
+  if (!isa<ConstantSDNode>(N))
+    return false;
+
+  SDLoc DL(N);
+  int64_t Val = cast<ConstantSDNode>(N)
+                    ->getAPIntValue()
+                    .trunc(VT.getFixedSizeInBits())
+                    .getSExtValue();
+
+  // Signed saturating instructions treat their immediate operand as unsigned.
+  if (Val < 0)
+    return false;
+
+  switch (VT.SimpleTy) {
+  case MVT::i8:
+    // All positive immediates are supported.
+    Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
+    Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
+    return true;
+  case MVT::i16:
+  case MVT::i32:
+  case MVT::i64:
+    // Support 8bit positive immediates.
+    if (Val <= 255) {
+      Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
+      Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
+      return true;
+    }
+    // Support 16bit positive immediates that are a multiple of 256.
+    if (Val <= 65280 && Val % 256 == 0) {
+      Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
+      Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
+      return true;
+    }
+    break;
+  default:
+    break;
+  }
+
+  return false;
+}
+
 bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
                                              SDValue &Shift) {
   if (!isa<ConstantSDNode>(N))
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index a519d81362a73a..b2ced36caa9733 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -554,9 +554,9 @@ let Predicates = [HasSVEorSME] in {
   defm ADD_ZI   : sve_int_arith_imm0<0b000, "add", add>;
   defm SUB_ZI   : sve_int_arith_imm0<0b001, "sub", sub>;
   defm SUBR_ZI  : sve_int_arith_imm0<0b011, "subr", AArch64subr>;
-  defm SQADD_ZI : sve_int_arith_imm0<0b100, "sqadd", saddsat>;
+  defm SQADD_ZI : sve_int_arith_imm0_ssat<0b100, "sqadd", saddsat>;
   defm UQADD_ZI : sve_int_arith_imm0<0b101, "uqadd", uaddsat>;
-  defm SQSUB_ZI : sve_int_arith_imm0<0b110, "sqsub", ssubsat>;
+  defm SQSUB_ZI : sve_int_arith_imm0_ssat<0b110, "sqsub", ssubsat>;
   defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub", usubsat>;
 
   defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", AArch64mad_m1, "MLA_ZPmZZ", /*isReverseInstr*/ 1>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index ee8292fdd8839a..1daea6f7add1b9 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -249,6 +249,11 @@ def SVEAddSubImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i16>", [
 def SVEAddSubImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i32>", []>;
 def SVEAddSubImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubImm<MVT::i64>", []>;
 
+def SVEAddSubSSatImm8Pat  : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i8>", []>;
+def SVEAddSubSSatImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i16>", []>;
+def SVEAddSubSSatImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i32>", []>;
+def SVEAddSubSSatImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubSSatImm<MVT::i64>", []>;
+
 def SVECpyDupImm8Pat  : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i8>", []>;
 def SVECpyDupImm16Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i16>", []>;
 def SVECpyDupImm32Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i32>", []>;
@@ -4775,6 +4780,18 @@ multiclass sve_int_arith_imm0<bits<3> opc, string asm, SDPatternOperator op> {
   def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubImm64Pat, !cast<Instruction>(NAME # _D)>;
 }
 
+multiclass sve_int_arith_imm0_ssat<bits<3> opc, string asm, SDPatternOperator op> {
+  def _B : sve_int_arith_imm0<0b00, opc, asm, ZPR8,  addsub_imm8_opt_lsl_i8>;
+  def _H : sve_int_arith_imm0<0b01, opc, asm, ZPR16, addsub_imm8_opt_lsl_i16>;
+  def _S : sve_int_arith_imm0<0b10, opc, asm, ZPR32, addsub_imm8_opt_lsl_i32>;
+  def _D : sve_int_arith_imm0<0b11, opc, asm, ZPR64, addsub_imm8_opt_lsl_i64>;
+
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv16i8, op, ZPR8,  i32, SVEAddSubSSatImm8Pat,  !cast<Instruction>(NAME # _B)>;
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, op, ZPR16, i32, SVEAddSubSSatImm16Pat, !cast<Instruction>(NAME # _H)>;
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, op, ZPR32, i32, SVEAddSubSSatImm32Pat, !cast<Instruction>(NAME # _S)>;
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubSSatImm64Pat, !cast<Instruction>(NAME # _D)>;
+}
+
 class sve_int_arith_imm<bits<2> sz8_64, bits<6> opc, string asm,
                         ZPRRegOp zprty, Operand immtype>
 : I<(outs zprty:$Zdn), (ins zprty:$_Zdn, immtype:$imm),
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
index 83db28a99ce55b..0813aa1d50df16 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
@@ -1063,7 +1063,8 @@ define <vscale x 16 x i8> @sqadd_b_lowimm(<vscale x 16 x i8> %a) {
 define <vscale x 16 x i8> @sqadd_b_negimm(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: sqadd_b_negimm:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sqadd z0.b, z0.b, #255 // =0xff
+; CHECK-NEXT:    mov z1.b, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sqadd z0.b, z0.b, z1.b
 ; CHECK-NEXT:    ret
   %elt = insertelement <vscale x 16 x i8> undef, i8 -1, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
@@ -1204,7 +1205,8 @@ define <vscale x 16 x i8> @sqsub_b_lowimm(<vscale x 16 x i8> %a) {
 define <vscale x 16 x i8> @sqsub_b_negimm(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: sqsub_b_negimm:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sqsub z0.b, z0.b, #255 // =0xff
+; CHECK-NEXT:    mov z1.b, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sqsub z0.b, z0.b, z1.b
 ; CHECK-NEXT:    ret
   %elt = insertelement <vscale x 16 x i8> undef, i8 -1, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer

>From c2bae3e84d6a1631f90e470ca4f9ecf4ed53fe3c Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Thu, 11 Apr 2024 13:03:45 +0100
Subject: [PATCH 3/3] Add patterns to negate unsupported immediate and invert
 the operation.

---
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp    | 13 ++++---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  4 +--
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 29 ++++++++++-----
 .../AArch64/sve-intrinsics-int-arith-imm.ll   | 36 ++++++++-----------
 4 files changed, 45 insertions(+), 37 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 7ef0bb9230aac5..db7cc2725f7820 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -258,9 +258,9 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
     return SelectSVEAddSubImm(N, VT, Imm, Shift);
   }
 
-  template <MVT::SimpleValueType VT>
+  template <MVT::SimpleValueType VT, bool Negate>
   bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
-    return SelectSVEAddSubSSatImm(N, VT, Imm, Shift);
+    return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
   }
 
   template <MVT::SimpleValueType VT>
@@ -489,7 +489,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
   bool SelectCMP_SWAP(SDNode *N);
 
   bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
-  bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
+  bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
+                              bool Negate);
   bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
   bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
 
@@ -4021,7 +4022,8 @@ bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
 }
 
 bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
-                                                 SDValue &Imm, SDValue &Shift) {
+                                                 SDValue &Imm, SDValue &Shift,
+                                                 bool Negate) {
   if (!isa<ConstantSDNode>(N))
     return false;
 
@@ -4031,6 +4033,9 @@ bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
                     .trunc(VT.getFixedSizeInBits())
                     .getSExtValue();
 
+  if (Negate)
+    Val = -Val;
+
   // Signed saturating instructions treat their immediate operand as unsigned.
   if (Val < 0)
     return false;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index b2ced36caa9733..4e71aa45b5f497 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -554,9 +554,9 @@ let Predicates = [HasSVEorSME] in {
   defm ADD_ZI   : sve_int_arith_imm0<0b000, "add", add>;
   defm SUB_ZI   : sve_int_arith_imm0<0b001, "sub", sub>;
   defm SUBR_ZI  : sve_int_arith_imm0<0b011, "subr", AArch64subr>;
-  defm SQADD_ZI : sve_int_arith_imm0_ssat<0b100, "sqadd", saddsat>;
+  defm SQADD_ZI : sve_int_arith_imm0_ssat<0b100, "sqadd", saddsat, ssubsat>;
   defm UQADD_ZI : sve_int_arith_imm0<0b101, "uqadd", uaddsat>;
-  defm SQSUB_ZI : sve_int_arith_imm0_ssat<0b110, "sqsub", ssubsat>;
+  defm SQSUB_ZI : sve_int_arith_imm0_ssat<0b110, "sqsub", ssubsat, saddsat>;
   defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub", usubsat>;
 
   defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", AArch64mad_m1, "MLA_ZPmZZ", /*isReverseInstr*/ 1>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 1daea6f7add1b9..fd41791a97fc85 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -249,10 +249,15 @@ def SVEAddSubImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i16>", [
 def SVEAddSubImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i32>", []>;
 def SVEAddSubImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubImm<MVT::i64>", []>;
 
-def SVEAddSubSSatImm8Pat  : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i8>", []>;
-def SVEAddSubSSatImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i16>", []>;
-def SVEAddSubSSatImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i32>", []>;
-def SVEAddSubSSatImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubSSatImm<MVT::i64>", []>;
+def SVEAddSubSSatNegImm8Pat  : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i8, true>", []>;
+def SVEAddSubSSatNegImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i16, true>", []>;
+def SVEAddSubSSatNegImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i32, true>", []>;
+def SVEAddSubSSatNegImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubSSatImm<MVT::i64, true>", []>;
+
+def SVEAddSubSSatPosImm8Pat  : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i8, false>", []>;
+def SVEAddSubSSatPosImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i16, false>", []>;
+def SVEAddSubSSatPosImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i32, false>", []>;
+def SVEAddSubSSatPosImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubSSatImm<MVT::i64, false>", []>;
 
 def SVECpyDupImm8Pat  : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i8>", []>;
 def SVECpyDupImm16Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i16>", []>;
@@ -4780,16 +4785,22 @@ multiclass sve_int_arith_imm0<bits<3> opc, string asm, SDPatternOperator op> {
   def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubImm64Pat, !cast<Instruction>(NAME # _D)>;
 }
 
-multiclass sve_int_arith_imm0_ssat<bits<3> opc, string asm, SDPatternOperator op> {
+multiclass sve_int_arith_imm0_ssat<bits<3> opc, string asm, SDPatternOperator op,
+                                   SDPatternOperator inv_op> {
   def _B : sve_int_arith_imm0<0b00, opc, asm, ZPR8,  addsub_imm8_opt_lsl_i8>;
   def _H : sve_int_arith_imm0<0b01, opc, asm, ZPR16, addsub_imm8_opt_lsl_i16>;
   def _S : sve_int_arith_imm0<0b10, opc, asm, ZPR32, addsub_imm8_opt_lsl_i32>;
   def _D : sve_int_arith_imm0<0b11, opc, asm, ZPR64, addsub_imm8_opt_lsl_i64>;
 
-  def : SVE_1_Op_Imm_OptLsl_Pat<nxv16i8, op, ZPR8,  i32, SVEAddSubSSatImm8Pat,  !cast<Instruction>(NAME # _B)>;
-  def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, op, ZPR16, i32, SVEAddSubSSatImm16Pat, !cast<Instruction>(NAME # _H)>;
-  def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, op, ZPR32, i32, SVEAddSubSSatImm32Pat, !cast<Instruction>(NAME # _S)>;
-  def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubSSatImm64Pat, !cast<Instruction>(NAME # _D)>;
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv16i8, op, ZPR8,  i32, SVEAddSubSSatPosImm8Pat,  !cast<Instruction>(NAME # _B)>;
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, op, ZPR16, i32, SVEAddSubSSatPosImm16Pat, !cast<Instruction>(NAME # _H)>;
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, op, ZPR32, i32, SVEAddSubSSatPosImm32Pat, !cast<Instruction>(NAME # _S)>;
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubSSatPosImm64Pat, !cast<Instruction>(NAME # _D)>;
+
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv16i8, inv_op, ZPR8,  i32, SVEAddSubSSatNegImm8Pat,  !cast<Instruction>(NAME # _B)>;
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, inv_op, ZPR16, i32, SVEAddSubSSatNegImm16Pat, !cast<Instruction>(NAME # _H)>;
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, inv_op, ZPR32, i32, SVEAddSubSSatNegImm32Pat, !cast<Instruction>(NAME # _S)>;
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, inv_op, ZPR64, i64, SVEAddSubSSatNegImm64Pat, !cast<Instruction>(NAME # _D)>;
 }
 
 class sve_int_arith_imm<bits<2> sz8_64, bits<6> opc, string asm,
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
index 0813aa1d50df16..3e453a6b781794 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
@@ -1063,10 +1063,9 @@ define <vscale x 16 x i8> @sqadd_b_lowimm(<vscale x 16 x i8> %a) {
 define <vscale x 16 x i8> @sqadd_b_negimm(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: sqadd_b_negimm:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z1.b, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    sqadd z0.b, z0.b, z1.b
+; CHECK-NEXT:    sqsub z0.b, z0.b, #128 // =0x80
 ; CHECK-NEXT:    ret
-  %elt = insertelement <vscale x 16 x i8> undef, i8 -1, i32 0
+  %elt = insertelement <vscale x 16 x i8> undef, i8 -128, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.x.nxv16i8(<vscale x 16 x i8> %a,
                                                                    <vscale x 16 x i8> %splat)
@@ -1101,8 +1100,7 @@ define <vscale x 8 x i16> @sqadd_h_highimm(<vscale x 8 x i16> %a) {
 define <vscale x 8 x i16> @sqadd_h_negimm(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: sqadd_h_negimm:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z1.h, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    sqadd z0.h, z0.h, z1.h
+; CHECK-NEXT:    sqsub z0.h, z0.h, #1 // =0x1
 ; CHECK-NEXT:    ret
   %elt = insertelement <vscale x 8 x i16> undef, i16 -1, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
@@ -1139,10 +1137,9 @@ define <vscale x 4 x i32> @sqadd_s_highimm(<vscale x 4 x i32> %a) {
 define <vscale x 4 x i32> @sqadd_s_negimm(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sqadd_s_negimm:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z1.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    sqadd z0.s, z0.s, z1.s
+; CHECK-NEXT:    sqsub z0.s, z0.s, #65280 // =0xff00
 ; CHECK-NEXT:    ret
-  %elt = insertelement <vscale x 4 x i32> undef, i32 -1, i32 0
+  %elt = insertelement <vscale x 4 x i32> undef, i32 -65280, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32> %a,
                                                                    <vscale x 4 x i32> %splat)
@@ -1177,10 +1174,9 @@ define <vscale x 2 x i64> @sqadd_d_highimm(<vscale x 2 x i64> %a) {
 define <vscale x 2 x i64> @sqadd_d_negimm(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: sqadd_d_negimm:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z1.d, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    sqadd z0.d, z0.d, z1.d
+; CHECK-NEXT:    sqsub z0.d, z0.d, #3840 // =0xf00
 ; CHECK-NEXT:    ret
-  %elt = insertelement <vscale x 2 x i64> undef, i64 -1, i32 0
+  %elt = insertelement <vscale x 2 x i64> undef, i64 -3840, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64> %a,
                                                                    <vscale x 2 x i64> %splat)
@@ -1205,8 +1201,7 @@ define <vscale x 16 x i8> @sqsub_b_lowimm(<vscale x 16 x i8> %a) {
 define <vscale x 16 x i8> @sqsub_b_negimm(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: sqsub_b_negimm:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z1.b, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    sqsub z0.b, z0.b, z1.b
+; CHECK-NEXT:    sqadd z0.b, z0.b, #1 // =0x1
 ; CHECK-NEXT:    ret
   %elt = insertelement <vscale x 16 x i8> undef, i8 -1, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
@@ -1243,10 +1238,9 @@ define <vscale x 8 x i16> @sqsub_h_highimm(<vscale x 8 x i16> %a) {
 define <vscale x 8 x i16> @sqsub_h_negimm(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: sqsub_h_negimm:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z1.h, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    sqsub z0.h, z0.h, z1.h
+; CHECK-NEXT:    sqadd z0.h, z0.h, #128 // =0x80
 ; CHECK-NEXT:    ret
-  %elt = insertelement <vscale x 8 x i16> undef, i16 -1, i32 0
+  %elt = insertelement <vscale x 8 x i16> undef, i16 -128, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16> %a,
                                                                    <vscale x 8 x i16> %splat)
@@ -1281,10 +1275,9 @@ define <vscale x 4 x i32> @sqsub_s_highimm(<vscale x 4 x i32> %a) {
 define <vscale x 4 x i32> @sqsub_s_negimm(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sqsub_s_negimm:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z1.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    sqsub z0.s, z0.s, z1.s
+; CHECK-NEXT:    sqadd z0.s, z0.s, #32768 // =0x8000
 ; CHECK-NEXT:    ret
-  %elt = insertelement <vscale x 4 x i32> undef, i32 -1, i32 0
+  %elt = insertelement <vscale x 4 x i32> undef, i32 -32768, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32> %a,
                                                                    <vscale x 4 x i32> %splat)
@@ -1319,10 +1312,9 @@ define <vscale x 2 x i64> @sqsub_d_highimm(<vscale x 2 x i64> %a) {
 define <vscale x 2 x i64> @sqsub_d_negimm(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: sqsub_d_negimm:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z1.d, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    sqsub z0.d, z0.d, z1.d
+; CHECK-NEXT:    sqadd z0.d, z0.d, #57344 // =0xe000
 ; CHECK-NEXT:    ret
-  %elt = insertelement <vscale x 2 x i64> undef, i64 -1, i32 0
+  %elt = insertelement <vscale x 2 x i64> undef, i64 -57344, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64> %a,
                                                                    <vscale x 2 x i64> %splat)



More information about the llvm-commits mailing list