[PATCH] D40984: [InstCombine] canonicalize shifty abs(): ashr+add+xor --> cmp+neg+sel

Thu Dec 7 14:01:07 PST 2017

spatel created this revision.
Herald added a subscriber: mcrosier.

We want to do this for 2 reasons:

1. Value tracking does not recognize the ashr variant, so it would fail to match for cases like https://reviews.llvm.org/D39766.
2. DAGCombiner tries to recognize the ashr variant for scalars, but not vectors. For vectors, we only have:

  // Canonicalize integer abs.
  // vselect (setg[te] X,  0),  X, -X ->
  // vselect (setgt    X, -1),  X, -X ->
  // vselect (setl[te] X,  0), -X,  X ->
  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)

(the comment isn't accurate - we'll produce an ISD::ABS node if it's legal or custom)

But even for scalars, it doesn't handle commuted variants (see DAGCombiner under comment):

  // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)

So it should work if you start with a cmp+sel pattern because we do this:

  // Check to see if this is an integer abs.
  // select_cc setg[te] X,  0,  X, -X ->
  // select_cc setgt    X, -1,  X, -X ->
  // select_cc setl[te] X,  0, -X,  X ->
  // select_cc setlt    X,  1, -X,  X ->
  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)

but it allows other cases to fall though the cracks:

  define i32 @abs_shifty(i32 %x) {
    %signbit = ashr i32 %x, 31 
    %add = add i32 %signbit, %x  
    %abs = xor i32 %signbit, %add 
    ret i32 %abs
  }
  
  define i32 @abs_cmpsubsel(i32 %x) {
    %cmp = icmp slt i32 %x, zeroinitializer
    %sub = sub i32 zeroinitializer, %x
    %abs = select i1 %cmp, i32 %sub, i32 %x
    ret i32 %abs
  }
  
  define <4 x i32> @abs_shifty_vec(<4 x i32> %x) {
    %signbit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> 
    %add = add <4 x i32> %signbit, %x  
    %abs = xor <4 x i32> %signbit, %add 
    ret <4 x i32> %abs
  }
  
  define <4 x i32> @abs_cmpsubsel_vec(<4 x i32> %x) {
    %cmp = icmp slt <4 x i32> %x, zeroinitializer
    %sub = sub <4 x i32> zeroinitializer, %x
    %abs = select <4 x i1> %cmp, <4 x i32> %sub, <4 x i32> %x
    ret <4 x i32> %abs
  }

$  ./llc -o - -mattr=avx abs.ll

  _abs_shifty:             
  	movl	%edi, %eax
  	sarl	$31, %eax
  	addl	%eax, %edi
  	xorl	%eax, %edi
  	movl	%edi, %eax
  	retq
  
  _abs_cmpsubsel:   
  	movl	%edi, %eax
  	negl	%eax
  	cmovll	%edi, %eax
  	retq
  	.cfi_endproc
                          
  
  _abs_shifty_vec:            
  	vpsrad	$31, %xmm0, %xmm1
  	vpaddd	%xmm0, %xmm1, %xmm0
  	vpxor	%xmm0, %xmm1, %xmm0
  	retq
  
  
  _abs_cmpsubsel_vec:   
  	vpabsd	%xmm0, %xmm0
  	retq


https://reviews.llvm.org/D40984

Files:
  lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
  test/Transforms/InstCombine/abs-1.ll


Index: test/Transforms/InstCombine/abs-1.ll
===================================================================

--- test/Transforms/InstCombine/abs-1.ll
+++ test/Transforms/InstCombine/abs-1.ll
@@ -48,9 +48,9 @@
 
 define i8 @shifty_abs_commute0(i8 %x) {
 ; CHECK-LABEL: @shifty_abs_commute0(
-; CHECK-NEXT:    [[SIGNBIT:%.*]] = ashr i8 %x, 7
-; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[SIGNBIT]], %x
-; CHECK-NEXT:    [[ABS:%.*]] = xor i8 [[ADD]], [[SIGNBIT]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 %x, 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i8 0, %x
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 %x
 ; CHECK-NEXT:    ret i8 [[ABS]]
 ;
   %signbit = ashr i8 %x, 7
@@ -61,9 +61,9 @@
 
 define <2 x i8> @shifty_abs_commute1(<2 x i8> %x) {
 ; CHECK-LABEL: @shifty_abs_commute1(
-; CHECK-NEXT:    [[SIGNBIT:%.*]] = ashr <2 x i8> %x, <i8 7, i8 7>
-; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i8> [[SIGNBIT]], %x
-; CHECK-NEXT:    [[ABS:%.*]] = xor <2 x i8> [[SIGNBIT]], [[ADD]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i8> %x, zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = sub <2 x i8> zeroinitializer, %x
+; CHECK-NEXT:    [[ABS:%.*]] = select <2 x i1> [[TMP1]], <2 x i8> [[TMP2]], <2 x i8> %x
 ; CHECK-NEXT:    ret <2 x i8> [[ABS]]
 ;
   %signbit = ashr <2 x i8> %x, <i8 7, i8 7>
@@ -75,9 +75,9 @@
 define <2 x i8> @shifty_abs_commute2(<2 x i8> %x) {
 ; CHECK-LABEL: @shifty_abs_commute2(
 ; CHECK-NEXT:    [[Y:%.*]] = mul <2 x i8> %x, <i8 3, i8 3>
-; CHECK-NEXT:    [[SIGNBIT:%.*]] = ashr <2 x i8> [[Y]], <i8 7, i8 7>
-; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i8> [[Y]], [[SIGNBIT]]
-; CHECK-NEXT:    [[ABS:%.*]] = xor <2 x i8> [[SIGNBIT]], [[ADD]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i8> [[Y]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = sub <2 x i8> zeroinitializer, [[Y]]
+; CHECK-NEXT:    [[ABS:%.*]] = select <2 x i1> [[TMP1]], <2 x i8> [[TMP2]], <2 x i8> [[Y]]
 ; CHECK-NEXT:    ret <2 x i8> [[ABS]]
 ;
   %y = mul <2 x i8> %x, <i8 3, i8 3>   ; extra op to thwart complexity-based canonicalization
@@ -90,9 +90,9 @@
 define i8 @shifty_abs_commute3(i8 %x) {
 ; CHECK-LABEL: @shifty_abs_commute3(
 ; CHECK-NEXT:    [[Y:%.*]] = mul i8 %x, 3
-; CHECK-NEXT:    [[SIGNBIT:%.*]] = ashr i8 [[Y]], 7
-; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[Y]], [[SIGNBIT]]
-; CHECK-NEXT:    [[ABS:%.*]] = xor i8 [[ADD]], [[SIGNBIT]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[Y]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i8 0, [[Y]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 [[Y]]
 ; CHECK-NEXT:    ret i8 [[ABS]]
 ;
   %y = mul i8 %x, 3                    ; extra op to thwart complexity-based canonicalization
Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2397,5 +2397,25 @@
   if (Instruction *CastedXor = foldCastedBitwiseLogic(I))
     return CastedXor;
 
+  // Canonicalize the shifty way to code absolute value to the common pattern.
+  // There are 4 potential commuted variants. Move the 'ashr' candidate to Op1.
+  // We're relying on the fact that we only do this transform when the shift has
+  // exactly 2 uses and the add has exactly 1 use (otherwise, we might increase
+  // instructions).
+  if (Op0->getNumUses() == 2)
+    std::swap(Op0, Op1);
+
+  const APInt *ShAmt;
+  Type *Ty = I.getType();
+  if (match(Op1, m_AShr(m_Value(A), m_APInt(ShAmt))) &&
+      Op1->getNumUses() == 2 && *ShAmt == Ty->getScalarSizeInBits() - 1 &&
+      match(Op0, m_OneUse(m_c_Add(m_Specific(A), m_Specific(Op1))))) {
+    // B = ashr i32 A, 31 ; smear the sign bit
+    // xor (add A, B), B  ; add -1 and flip bits if negative
+    // --> (A < 0) ? -A : A
+    Value *Cmp = Builder.CreateICmpSLT(A, ConstantInt::getNullValue(Ty));
+    return SelectInst::Create(Cmp, Builder.CreateNeg(A), A);
+  }
+
   return Changed ? &I : nullptr;
 }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D40984.125995.patch
Type: text/x-patch
Size: 3990 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171207/6835bf7e/attachment.bin>