[llvm-branch-commits] [llvm] 0637dfe - [DAG] Legalize abs(x) -> smax(x, sub(0, x)) iff smax/sub are legal

Wed Nov 25 07:12:14 PST 2020

Author: Simon Pilgrim
Date: 2020-11-25T15:03:03Z
New Revision: 0637dfe88b02a68f27fbad16ea47b26f91a47c61

URL: https://github.com/llvm/llvm-project/commit/0637dfe88b02a68f27fbad16ea47b26f91a47c61
DIFF: https://github.com/llvm/llvm-project/commit/0637dfe88b02a68f27fbad16ea47b26f91a47c61.diff

LOG: [DAG] Legalize abs(x) -> smax(x,sub(0,x)) iff smax/sub are legal

If smax() is legal, this is likely to result in smaller codegen expansion for abs(x) than the xor(add,ashr) method.

This is also what PowerPC has been doing for its abs implementation, so it lets us get rid of a load of custom lowering code there (and which was never updated when they added smax lowering).

Alive2: https://alive2.llvm.org/ce/z/xRk3cD

Differential Revision: https://reviews.llvm.org/D92095

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCISelLowering.h
    llvm/test/CodeGen/X86/abs.ll
    llvm/test/CodeGen/X86/combine-abs.ll
    llvm/test/CodeGen/X86/viabs.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 93df88b3f6d7..0b1c25764549 100644

--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6822,6 +6822,14 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
   SDValue Op = N->getOperand(0);
 
+  // abs(x) -> smax(x,sub(0,x))
+  if (isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::SMAX, VT)) {
+    SDValue Zero = DAG.getConstant(0, dl, VT);
+    Result = DAG.getNode(ISD::SMAX, dl, VT, Op,
+                         DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
+    return true;
+  }
+
   // Only expand vector types if we have the appropriate vector operations.
   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) ||
                         !isOperationLegalOrCustom(ISD::ADD, VT) ||

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index d19fbd477d77..1f3ef3efa2c2 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -806,9 +806,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
     }
 
-    for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
-      setOperationAction(ISD::ABS, VT, Custom);
-
     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
     // with merges, splats, etc.
     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
@@ -841,11 +838,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
 
-    // Without hasP8Altivec set, v2i64 SMAX isn't available.
-    // But ABS custom lowering requires SMAX support.
-    if (!Subtarget.hasP8Altivec())
-      setOperationAction(ISD::ABS, MVT::v2i64, Expand);
-
     // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
     setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
     // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
@@ -10886,44 +10878,6 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
   }
 }
 
-SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
-
-  assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS");
-
-  EVT VT = Op.getValueType();
-  assert(VT.isVector() &&
-         "Only set vector abs as custom, scalar abs shouldn't reach here!");
-  assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
-          VT == MVT::v16i8) &&
-         "Unexpected vector element type!");
-  assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&
-         "Current subtarget doesn't support smax v2i64!");
-
-  // For vector abs, it can be lowered to:
-  // abs x
-  // ==>
-  // y = -x
-  // smax(x, y)
-
-  SDLoc dl(Op);
-  SDValue X = Op.getOperand(0);
-  SDValue Zero = DAG.getConstant(0, dl, VT);
-  SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
-
-  // SMAX patch https://reviews.llvm.org/D47332
-  // hasn't landed yet, so use intrinsic first here.
-  // TODO: Should use SMAX directly once SMAX patch landed
-  Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
-  if (VT == MVT::v2i64)
-    BifID = Intrinsic::ppc_altivec_vmaxsd;
-  else if (VT == MVT::v8i16)
-    BifID = Intrinsic::ppc_altivec_vmaxsh;
-  else if (VT == MVT::v16i8)
-    BifID = Intrinsic::ppc_altivec_vmaxsb;
-
-  return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
-}
-
 // Custom lowering for fpext vf32 to v2f64
 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
 
@@ -11059,7 +11013,6 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
   case ISD::MUL:                return LowerMUL(Op, DAG);
-  case ISD::ABS:                return LowerABS(Op, DAG);
   case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
   case ISD::ROTL:               return LowerROTL(Op, DAG);
 

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 6c4899fae22c..8fad97c618b7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1157,7 +1157,6 @@ namespace llvm {
     SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
 

diff  --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll
index 8e20b001cc3e..942f09f9341b 100644
--- a/llvm/test/CodeGen/X86/abs.ll
+++ b/llvm/test/CodeGen/X86/abs.ll
@@ -397,10 +397,9 @@ define <8 x i32> @test_v8i32(<8 x i32> %a) nounwind {
 define <8 x i16> @test_v8i16(<8 x i16> %a) nounwind {
 ; SSE-LABEL: test_v8i16:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movdqa %xmm0, %xmm1
-; SSE-NEXT:    psraw $15, %xmm1
-; SSE-NEXT:    paddw %xmm1, %xmm0
-; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    psubw %xmm0, %xmm1
+; SSE-NEXT:    pmaxsw %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_v8i16:

diff  --git a/llvm/test/CodeGen/X86/combine-abs.ll b/llvm/test/CodeGen/X86/combine-abs.ll
index af399e303704..da59da48a919 100644
--- a/llvm/test/CodeGen/X86/combine-abs.ll
+++ b/llvm/test/CodeGen/X86/combine-abs.ll
@@ -55,10 +55,9 @@ define i32 @combine_i32_abs_abs(i32 %a) {
 define <8 x i16> @combine_v8i16_abs_abs(<8 x i16> %a) {
 ; SSE2-LABEL: combine_v8i16_abs_abs:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psraw $15, %xmm1
-; SSE2-NEXT:    paddw %xmm1, %xmm0
-; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    psubw %xmm0, %xmm1
+; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: combine_v8i16_abs_abs:

diff  --git a/llvm/test/CodeGen/X86/viabs.ll b/llvm/test/CodeGen/X86/viabs.ll
index 588246d7ded6..873dea24ecdd 100644
--- a/llvm/test/CodeGen/X86/viabs.ll
+++ b/llvm/test/CodeGen/X86/viabs.ll
@@ -88,10 +88,9 @@ define <4 x i32> @test_abs_ge_v4i32(<4 x i32> %a) nounwind {
 define <8 x i16> @test_abs_gt_v8i16(<8 x i16> %a) nounwind {
 ; SSE2-LABEL: test_abs_gt_v8i16:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psraw $15, %xmm1
-; SSE2-NEXT:    paddw %xmm1, %xmm0
-; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    psubw %xmm0, %xmm1
+; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: test_abs_gt_v8i16:
@@ -311,14 +310,12 @@ define <8 x i32> @test_abs_ge_v8i32(<8 x i32> %a) nounwind {
 define <16 x i16> @test_abs_gt_v16i16(<16 x i16> %a) nounwind {
 ; SSE2-LABEL: test_abs_gt_v16i16:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    psraw $15, %xmm2
-; SSE2-NEXT:    paddw %xmm2, %xmm0
-; SSE2-NEXT:    pxor %xmm2, %xmm0
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    psraw $15, %xmm2
-; SSE2-NEXT:    paddw %xmm2, %xmm1
-; SSE2-NEXT:    pxor %xmm2, %xmm1
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm3
+; SSE2-NEXT:    psubw %xmm0, %xmm3
+; SSE2-NEXT:    pmaxsw %xmm3, %xmm0
+; SSE2-NEXT:    psubw %xmm1, %xmm2
+; SSE2-NEXT:    pmaxsw %xmm2, %xmm1
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: test_abs_gt_v16i16:
@@ -948,22 +945,18 @@ define <64 x i8> @test_abs_lt_v64i8(<64 x i8> %a) nounwind {
 define <32 x i16> @test_abs_gt_v32i16(<32 x i16> %a) nounwind {
 ; SSE2-LABEL: test_abs_gt_v32i16:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm0, %xmm4
-; SSE2-NEXT:    psraw $15, %xmm4
-; SSE2-NEXT:    paddw %xmm4, %xmm0
-; SSE2-NEXT:    pxor %xmm4, %xmm0
-; SSE2-NEXT:    movdqa %xmm1, %xmm4
-; SSE2-NEXT:    psraw $15, %xmm4
-; SSE2-NEXT:    paddw %xmm4, %xmm1
-; SSE2-NEXT:    pxor %xmm4, %xmm1
-; SSE2-NEXT:    movdqa %xmm2, %xmm4
-; SSE2-NEXT:    psraw $15, %xmm4
-; SSE2-NEXT:    paddw %xmm4, %xmm2
-; SSE2-NEXT:    pxor %xmm4, %xmm2
-; SSE2-NEXT:    movdqa %xmm3, %xmm4
-; SSE2-NEXT:    psraw $15, %xmm4
-; SSE2-NEXT:    paddw %xmm4, %xmm3
-; SSE2-NEXT:    pxor %xmm4, %xmm3
+; SSE2-NEXT:    pxor %xmm4, %xmm4
+; SSE2-NEXT:    pxor %xmm5, %xmm5
+; SSE2-NEXT:    psubw %xmm0, %xmm5
+; SSE2-NEXT:    pmaxsw %xmm5, %xmm0
+; SSE2-NEXT:    pxor %xmm5, %xmm5
+; SSE2-NEXT:    psubw %xmm1, %xmm5
+; SSE2-NEXT:    pmaxsw %xmm5, %xmm1
+; SSE2-NEXT:    pxor %xmm5, %xmm5
+; SSE2-NEXT:    psubw %xmm2, %xmm5
+; SSE2-NEXT:    pmaxsw %xmm5, %xmm2
+; SSE2-NEXT:    psubw %xmm3, %xmm4
+; SSE2-NEXT:    pmaxsw %xmm4, %xmm3
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: test_abs_gt_v32i16: