[llvm] d48ebac - [LLVM][SVE][CodeGen] Fix incorrect isel for signed saturating instructions. (#88136)

Mon Apr 15 04:24:46 PDT 2024

Author: Paul Walker
Date: 2024-04-15T12:24:42+01:00
New Revision: d48ebac0389ba75defce5bd83d8539d51008b28d

URL: https://github.com/llvm/llvm-project/commit/d48ebac0389ba75defce5bd83d8539d51008b28d
DIFF: https://github.com/llvm/llvm-project/commit/d48ebac0389ba75defce5bd83d8539d51008b28d.diff

LOG: [LLVM][SVE][CodeGen] Fix incorrect isel for signed saturating instructions. (#88136)

The immediate forms of SQADD and SQSUB interpret their immediate
operand as unsigned and thus effectively only support positive
immediate operands.
    
The original code is only wrong for the i8 cases because they
previously accepted all values, however, the new patterns enable more
uses and this I've added tests for the larger element types as well.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/lib/Target/AArch64/SVEInstrFormats.td
    llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 51bec3604026b0..80272213dd3897 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -258,6 +258,11 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
     return SelectSVEAddSubImm(N, VT, Imm, Shift);
   }
 
+  template <MVT::SimpleValueType VT, bool Negate>
+  bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
+    return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
+  }
+
   template <MVT::SimpleValueType VT>
   bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
     return SelectSVECpyDupImm(N, VT, Imm, Shift);
@@ -484,6 +489,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
   bool SelectCMP_SWAP(SDNode *N);
 
   bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
+  bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
+                              bool Negate);
   bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
   bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
 
@@ -4014,6 +4021,56 @@ bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
   return false;
 }
 
+bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
+                                                 SDValue &Imm, SDValue &Shift,
+                                                 bool Negate) {
+  if (!isa<ConstantSDNode>(N))
+    return false;
+
+  SDLoc DL(N);
+  int64_t Val = cast<ConstantSDNode>(N)
+                    ->getAPIntValue()
+                    .trunc(VT.getFixedSizeInBits())
+                    .getSExtValue();
+
+  if (Negate)
+    Val = -Val;
+
+  // Signed saturating instructions treat their immediate operand as unsigned,
+  // whereas the related intrinsics define their operands to be signed. This
+  // means we can only use the immediate form when the operand is non-negative.
+  if (Val < 0)
+    return false;
+
+  switch (VT.SimpleTy) {
+  case MVT::i8:
+    // All positive immediates are supported.
+    Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
+    Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
+    return true;
+  case MVT::i16:
+  case MVT::i32:
+  case MVT::i64:
+    // Support 8bit positive immediates.
+    if (Val <= 255) {
+      Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
+      Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
+      return true;
+    }
+    // Support 16bit positive immediates that are a multiple of 256.
+    if (Val <= 65280 && Val % 256 == 0) {
+      Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
+      Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
+      return true;
+    }
+    break;
+  default:
+    break;
+  }
+
+  return false;
+}
+
 bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
                                              SDValue &Shift) {
   if (!isa<ConstantSDNode>(N))

diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 9c747198c12d86..6972acd985cb9a 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -554,9 +554,9 @@ let Predicates = [HasSVEorSME] in {
   defm ADD_ZI   : sve_int_arith_imm0<0b000, "add", add>;
   defm SUB_ZI   : sve_int_arith_imm0<0b001, "sub", sub>;
   defm SUBR_ZI  : sve_int_arith_imm0<0b011, "subr", AArch64subr>;
-  defm SQADD_ZI : sve_int_arith_imm0<0b100, "sqadd", saddsat>;
+  defm SQADD_ZI : sve_int_arith_imm0_ssat<0b100, "sqadd", saddsat, ssubsat>;
   defm UQADD_ZI : sve_int_arith_imm0<0b101, "uqadd", uaddsat>;
-  defm SQSUB_ZI : sve_int_arith_imm0<0b110, "sqsub", ssubsat>;
+  defm SQSUB_ZI : sve_int_arith_imm0_ssat<0b110, "sqsub", ssubsat, saddsat>;
   defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub", usubsat>;
 
   defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", AArch64mad_m1, "MLA_ZPmZZ", /*isReverseInstr*/ 1>;

diff  --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index fb0c6188edb348..3317cf84aa6fa9 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -249,6 +249,16 @@ def SVEAddSubImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i16>", [
 def SVEAddSubImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i32>", []>;
 def SVEAddSubImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubImm<MVT::i64>", []>;
 
+def SVEAddSubSSatNegImm8Pat  : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i8, true>", []>;
+def SVEAddSubSSatNegImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i16, true>", []>;
+def SVEAddSubSSatNegImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i32, true>", []>;
+def SVEAddSubSSatNegImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubSSatImm<MVT::i64, true>", []>;
+
+def SVEAddSubSSatPosImm8Pat  : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i8, false>", []>;
+def SVEAddSubSSatPosImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i16, false>", []>;
+def SVEAddSubSSatPosImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i32, false>", []>;
+def SVEAddSubSSatPosImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubSSatImm<MVT::i64, false>", []>;
+
 def SVECpyDupImm8Pat  : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i8>", []>;
 def SVECpyDupImm16Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i16>", []>;
 def SVECpyDupImm32Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i32>", []>;
@@ -4775,6 +4785,24 @@ multiclass sve_int_arith_imm0<bits<3> opc, string asm, SDPatternOperator op> {
   def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubImm64Pat, !cast<Instruction>(NAME # _D)>;
 }
 
+multiclass sve_int_arith_imm0_ssat<bits<3> opc, string asm, SDPatternOperator op,
+                                   SDPatternOperator inv_op> {
+  def _B : sve_int_arith_imm0<0b00, opc, asm, ZPR8,  addsub_imm8_opt_lsl_i8>;
+  def _H : sve_int_arith_imm0<0b01, opc, asm, ZPR16, addsub_imm8_opt_lsl_i16>;
+  def _S : sve_int_arith_imm0<0b10, opc, asm, ZPR32, addsub_imm8_opt_lsl_i32>;
+  def _D : sve_int_arith_imm0<0b11, opc, asm, ZPR64, addsub_imm8_opt_lsl_i64>;
+
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv16i8, op, ZPR8,  i32, SVEAddSubSSatPosImm8Pat,  !cast<Instruction>(NAME # _B)>;
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, op, ZPR16, i32, SVEAddSubSSatPosImm16Pat, !cast<Instruction>(NAME # _H)>;
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, op, ZPR32, i32, SVEAddSubSSatPosImm32Pat, !cast<Instruction>(NAME # _S)>;
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubSSatPosImm64Pat, !cast<Instruction>(NAME # _D)>;
+
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv16i8, inv_op, ZPR8,  i32, SVEAddSubSSatNegImm8Pat,  !cast<Instruction>(NAME # _B)>;
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, inv_op, ZPR16, i32, SVEAddSubSSatNegImm16Pat, !cast<Instruction>(NAME # _H)>;
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, inv_op, ZPR32, i32, SVEAddSubSSatNegImm32Pat, !cast<Instruction>(NAME # _S)>;
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, inv_op, ZPR64, i64, SVEAddSubSSatNegImm64Pat, !cast<Instruction>(NAME # _D)>;
+}
+
 class sve_int_arith_imm<bits<2> sz8_64, bits<6> opc, string asm,
                         ZPRRegOp zprty, Operand immtype>
 : I<(outs zprty:$Zdn), (ins zprty:$_Zdn, immtype:$imm),

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
index c70006d988c19b..3e453a6b781794 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
@@ -1059,6 +1059,19 @@ define <vscale x 16 x i8> @sqadd_b_lowimm(<vscale x 16 x i8> %a) {
   ret <vscale x 16 x i8> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 16 x i8> @sqadd_b_negimm(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: sqadd_b_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqsub z0.b, z0.b, #128 // =0x80
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 16 x i8> undef, i8 -128, i32 0
+  %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.x.nxv16i8(<vscale x 16 x i8> %a,
+                                                                   <vscale x 16 x i8> %splat)
+  ret <vscale x 16 x i8> %out
+}
+
 define <vscale x 8 x i16> @sqadd_h_lowimm(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: sqadd_h_lowimm:
 ; CHECK:       // %bb.0:
@@ -1083,6 +1096,19 @@ define <vscale x 8 x i16> @sqadd_h_highimm(<vscale x 8 x i16> %a) {
   ret <vscale x 8 x i16> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 8 x i16> @sqadd_h_negimm(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: sqadd_h_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqsub z0.h, z0.h, #1 // =0x1
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 8 x i16> undef, i16 -1, i32 0
+  %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16> %a,
+                                                                   <vscale x 8 x i16> %splat)
+  ret <vscale x 8 x i16> %out
+}
+
 define <vscale x 4 x i32> @sqadd_s_lowimm(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sqadd_s_lowimm:
 ; CHECK:       // %bb.0:
@@ -1107,6 +1133,19 @@ define <vscale x 4 x i32> @sqadd_s_highimm(<vscale x 4 x i32> %a) {
   ret <vscale x 4 x i32> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 4 x i32> @sqadd_s_negimm(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: sqadd_s_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqsub z0.s, z0.s, #65280 // =0xff00
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 4 x i32> undef, i32 -65280, i32 0
+  %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32> %a,
+                                                                   <vscale x 4 x i32> %splat)
+  ret <vscale x 4 x i32> %out
+}
+
 define <vscale x 2 x i64> @sqadd_d_lowimm(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: sqadd_d_lowimm:
 ; CHECK:       // %bb.0:
@@ -1131,6 +1170,19 @@ define <vscale x 2 x i64> @sqadd_d_highimm(<vscale x 2 x i64> %a) {
   ret <vscale x 2 x i64> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 2 x i64> @sqadd_d_negimm(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: sqadd_d_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqsub z0.d, z0.d, #3840 // =0xf00
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 2 x i64> undef, i64 -3840, i32 0
+  %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64> %a,
+                                                                   <vscale x 2 x i64> %splat)
+  ret <vscale x 2 x i64> %out
+}
+
 ; SQSUB
 
 define <vscale x 16 x i8> @sqsub_b_lowimm(<vscale x 16 x i8> %a) {
@@ -1145,6 +1197,19 @@ define <vscale x 16 x i8> @sqsub_b_lowimm(<vscale x 16 x i8> %a) {
   ret <vscale x 16 x i8> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 16 x i8> @sqsub_b_negimm(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: sqsub_b_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqadd z0.b, z0.b, #1 // =0x1
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 16 x i8> undef, i8 -1, i32 0
+  %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.x.nxv16i8(<vscale x 16 x i8> %a,
+                                                                   <vscale x 16 x i8> %splat)
+  ret <vscale x 16 x i8> %out
+}
+
 define <vscale x 8 x i16> @sqsub_h_lowimm(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: sqsub_h_lowimm:
 ; CHECK:       // %bb.0:
@@ -1169,6 +1234,19 @@ define <vscale x 8 x i16> @sqsub_h_highimm(<vscale x 8 x i16> %a) {
   ret <vscale x 8 x i16> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 8 x i16> @sqsub_h_negimm(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: sqsub_h_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqadd z0.h, z0.h, #128 // =0x80
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 8 x i16> undef, i16 -128, i32 0
+  %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16> %a,
+                                                                   <vscale x 8 x i16> %splat)
+  ret <vscale x 8 x i16> %out
+}
+
 define <vscale x 4 x i32> @sqsub_s_lowimm(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sqsub_s_lowimm:
 ; CHECK:       // %bb.0:
@@ -1193,6 +1271,19 @@ define <vscale x 4 x i32> @sqsub_s_highimm(<vscale x 4 x i32> %a) {
   ret <vscale x 4 x i32> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 4 x i32> @sqsub_s_negimm(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: sqsub_s_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqadd z0.s, z0.s, #32768 // =0x8000
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 4 x i32> undef, i32 -32768, i32 0
+  %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32> %a,
+                                                                   <vscale x 4 x i32> %splat)
+  ret <vscale x 4 x i32> %out
+}
+
 define <vscale x 2 x i64> @sqsub_d_lowimm(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: sqsub_d_lowimm:
 ; CHECK:       // %bb.0:
@@ -1217,6 +1308,19 @@ define <vscale x 2 x i64> @sqsub_d_highimm(<vscale x 2 x i64> %a) {
   ret <vscale x 2 x i64> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 2 x i64> @sqsub_d_negimm(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: sqsub_d_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqadd z0.d, z0.d, #57344 // =0xe000
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 2 x i64> undef, i64 -57344, i32 0
+  %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64> %a,
+                                                                   <vscale x 2 x i64> %splat)
+  ret <vscale x 2 x i64> %out
+}
+
 ; UQADD
 
 define <vscale x 16 x i8> @uqadd_b_lowimm(<vscale x 16 x i8> %a) {