[llvm] r356332 - [TargetLowering] improve the default expansion of uaddsat/usubsat

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 17 07:57:40 PDT 2019


Author: spatel
Date: Sun Mar 17 07:57:40 2019
New Revision: 356332

URL: http://llvm.org/viewvc/llvm-project?rev=356332&view=rev
Log:
[TargetLowering] improve the default expansion of uaddsat/usubsat

This is a subset of what was proposed in:
D59006
...and may overlap with test changes from:
D59174
...but it seems like a good general optimization to turn selects
into bitwise-logic when possible because we never know exactly
what can happen at this stage of DAG combining depending on how
the target has defined things.

Differential Revision: https://reviews.llvm.org/D59066

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/trunk/test/CodeGen/AArch64/uadd_sat_vec.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=356332&r1=356331&r2=356332&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Sun Mar 17 07:57:40 2019
@@ -5426,9 +5426,20 @@ SDValue TargetLowering::expandAddSubSat(
   SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
 
   if (Opcode == ISD::UADDSAT) {
+    if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
+      // (LHS + RHS) | OverflowMask
+      SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
+      return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
+    }
     // Overflow ? 0xffff.... : (LHS + RHS)
     return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
   } else if (Opcode == ISD::USUBSAT) {
+    if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
+      // (LHS - RHS) & ~OverflowMask
+      SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
+      SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
+      return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
+    }
     // Overflow ? 0 : (LHS - RHS)
     return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
   } else {

Modified: llvm/trunk/test/CodeGen/AArch64/uadd_sat_vec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/uadd_sat_vec.ll?rev=356332&r1=356331&r2=356332&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/uadd_sat_vec.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/uadd_sat_vec.ll Sun Mar 17 07:57:40 2019
@@ -404,8 +404,7 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    add v1.2d, v0.2d, v1.2d
 ; CHECK-NEXT:    cmhi v0.2d, v0.2d, v1.2d
-; CHECK-NEXT:    bic v1.16b, v1.16b, v0.16b
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %z = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
   ret <2 x i64> %z
@@ -418,10 +417,8 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4
 ; CHECK-NEXT:    add v3.2d, v1.2d, v3.2d
 ; CHECK-NEXT:    cmhi v0.2d, v0.2d, v2.2d
 ; CHECK-NEXT:    cmhi v1.2d, v1.2d, v3.2d
-; CHECK-NEXT:    bic v2.16b, v2.16b, v0.16b
-; CHECK-NEXT:    bic v3.16b, v3.16b, v1.16b
-; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
-; CHECK-NEXT:    orr v1.16b, v1.16b, v3.16b
+; CHECK-NEXT:    orr v0.16b, v2.16b, v0.16b
+; CHECK-NEXT:    orr v1.16b, v3.16b, v1.16b
 ; CHECK-NEXT:    ret
   %z = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
   ret <4 x i64> %z
@@ -438,14 +435,10 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8
 ; CHECK-NEXT:    cmhi v1.2d, v1.2d, v5.2d
 ; CHECK-NEXT:    cmhi v2.2d, v2.2d, v6.2d
 ; CHECK-NEXT:    cmhi v3.2d, v3.2d, v7.2d
-; CHECK-NEXT:    bic v4.16b, v4.16b, v0.16b
-; CHECK-NEXT:    bic v5.16b, v5.16b, v1.16b
-; CHECK-NEXT:    bic v6.16b, v6.16b, v2.16b
-; CHECK-NEXT:    bic v7.16b, v7.16b, v3.16b
-; CHECK-NEXT:    orr v0.16b, v0.16b, v4.16b
-; CHECK-NEXT:    orr v1.16b, v1.16b, v5.16b
-; CHECK-NEXT:    orr v2.16b, v2.16b, v6.16b
-; CHECK-NEXT:    orr v3.16b, v3.16b, v7.16b
+; CHECK-NEXT:    orr v0.16b, v4.16b, v0.16b
+; CHECK-NEXT:    orr v1.16b, v5.16b, v1.16b
+; CHECK-NEXT:    orr v2.16b, v6.16b, v2.16b
+; CHECK-NEXT:    orr v3.16b, v7.16b, v3.16b
 ; CHECK-NEXT:    ret
   %z = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> %x, <8 x i64> %y)
   ret <8 x i64> %z




More information about the llvm-commits mailing list