[PATCH] D40984: [InstCombine] canonicalize shifty abs(): ashr+add+xor --> cmp+neg+sel
Sanjay Patel via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 7 14:01:07 PST 2017
spatel created this revision.
Herald added a subscriber: mcrosier.
We want to do this for 2 reasons:
1. Value tracking does not recognize the ashr variant, so it would fail to match for cases like https://reviews.llvm.org/D39766.
2. DAGCombiner tries to recognize the ashr variant for scalars, but not vectors. For vectors, we only have:
// Canonicalize integer abs.
// vselect (setg[te] X, 0), X, -X ->
// vselect (setgt X, -1), X, -X ->
// vselect (setl[te] X, 0), -X, X ->
// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
(the comment isn't accurate - we'll produce an ISD::ABS node if it's legal or custom)
But even for scalars, it doesn't handle commuted variants (see DAGCombiner under comment):
// fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
So it should work if you start with a cmp+sel pattern because we do this:
// Check to see if this is an integer abs.
// select_cc setg[te] X, 0, X, -X ->
// select_cc setgt X, -1, X, -X ->
// select_cc setl[te] X, 0, -X, X ->
// select_cc setlt X, 1, -X, X ->
// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
but it allows other cases to fall though the cracks:
define i32 @abs_shifty(i32 %x) {
%signbit = ashr i32 %x, 31
%add = add i32 %signbit, %x
%abs = xor i32 %signbit, %add
ret i32 %abs
}
define i32 @abs_cmpsubsel(i32 %x) {
%cmp = icmp slt i32 %x, zeroinitializer
%sub = sub i32 zeroinitializer, %x
%abs = select i1 %cmp, i32 %sub, i32 %x
ret i32 %abs
}
define <4 x i32> @abs_shifty_vec(<4 x i32> %x) {
%signbit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%add = add <4 x i32> %signbit, %x
%abs = xor <4 x i32> %signbit, %add
ret <4 x i32> %abs
}
define <4 x i32> @abs_cmpsubsel_vec(<4 x i32> %x) {
%cmp = icmp slt <4 x i32> %x, zeroinitializer
%sub = sub <4 x i32> zeroinitializer, %x
%abs = select <4 x i1> %cmp, <4 x i32> %sub, <4 x i32> %x
ret <4 x i32> %abs
}
$ ./llc -o - -mattr=avx abs.ll
_abs_shifty:
movl %edi, %eax
sarl $31, %eax
addl %eax, %edi
xorl %eax, %edi
movl %edi, %eax
retq
_abs_cmpsubsel:
movl %edi, %eax
negl %eax
cmovll %edi, %eax
retq
.cfi_endproc
_abs_shifty_vec:
vpsrad $31, %xmm0, %xmm1
vpaddd %xmm0, %xmm1, %xmm0
vpxor %xmm0, %xmm1, %xmm0
retq
_abs_cmpsubsel_vec:
vpabsd %xmm0, %xmm0
retq
https://reviews.llvm.org/D40984
Files:
lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
test/Transforms/InstCombine/abs-1.ll
Index: test/Transforms/InstCombine/abs-1.ll
===================================================================
--- test/Transforms/InstCombine/abs-1.ll
+++ test/Transforms/InstCombine/abs-1.ll
@@ -48,9 +48,9 @@
define i8 @shifty_abs_commute0(i8 %x) {
; CHECK-LABEL: @shifty_abs_commute0(
-; CHECK-NEXT: [[SIGNBIT:%.*]] = ashr i8 %x, 7
-; CHECK-NEXT: [[ADD:%.*]] = add i8 [[SIGNBIT]], %x
-; CHECK-NEXT: [[ABS:%.*]] = xor i8 [[ADD]], [[SIGNBIT]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 %x, 0
+; CHECK-NEXT: [[TMP2:%.*]] = sub i8 0, %x
+; CHECK-NEXT: [[ABS:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 %x
; CHECK-NEXT: ret i8 [[ABS]]
;
%signbit = ashr i8 %x, 7
@@ -61,9 +61,9 @@
define <2 x i8> @shifty_abs_commute1(<2 x i8> %x) {
; CHECK-LABEL: @shifty_abs_commute1(
-; CHECK-NEXT: [[SIGNBIT:%.*]] = ashr <2 x i8> %x, <i8 7, i8 7>
-; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[SIGNBIT]], %x
-; CHECK-NEXT: [[ABS:%.*]] = xor <2 x i8> [[SIGNBIT]], [[ADD]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i8> %x, zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i8> zeroinitializer, %x
+; CHECK-NEXT: [[ABS:%.*]] = select <2 x i1> [[TMP1]], <2 x i8> [[TMP2]], <2 x i8> %x
; CHECK-NEXT: ret <2 x i8> [[ABS]]
;
%signbit = ashr <2 x i8> %x, <i8 7, i8 7>
@@ -75,9 +75,9 @@
define <2 x i8> @shifty_abs_commute2(<2 x i8> %x) {
; CHECK-LABEL: @shifty_abs_commute2(
; CHECK-NEXT: [[Y:%.*]] = mul <2 x i8> %x, <i8 3, i8 3>
-; CHECK-NEXT: [[SIGNBIT:%.*]] = ashr <2 x i8> [[Y]], <i8 7, i8 7>
-; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[Y]], [[SIGNBIT]]
-; CHECK-NEXT: [[ABS:%.*]] = xor <2 x i8> [[SIGNBIT]], [[ADD]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i8> [[Y]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i8> zeroinitializer, [[Y]]
+; CHECK-NEXT: [[ABS:%.*]] = select <2 x i1> [[TMP1]], <2 x i8> [[TMP2]], <2 x i8> [[Y]]
; CHECK-NEXT: ret <2 x i8> [[ABS]]
;
%y = mul <2 x i8> %x, <i8 3, i8 3> ; extra op to thwart complexity-based canonicalization
@@ -90,9 +90,9 @@
define i8 @shifty_abs_commute3(i8 %x) {
; CHECK-LABEL: @shifty_abs_commute3(
; CHECK-NEXT: [[Y:%.*]] = mul i8 %x, 3
-; CHECK-NEXT: [[SIGNBIT:%.*]] = ashr i8 [[Y]], 7
-; CHECK-NEXT: [[ADD:%.*]] = add i8 [[Y]], [[SIGNBIT]]
-; CHECK-NEXT: [[ABS:%.*]] = xor i8 [[ADD]], [[SIGNBIT]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[Y]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = sub i8 0, [[Y]]
+; CHECK-NEXT: [[ABS:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 [[Y]]
; CHECK-NEXT: ret i8 [[ABS]]
;
%y = mul i8 %x, 3 ; extra op to thwart complexity-based canonicalization
Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2397,5 +2397,25 @@
if (Instruction *CastedXor = foldCastedBitwiseLogic(I))
return CastedXor;
+ // Canonicalize the shifty way to code absolute value to the common pattern.
+ // There are 4 potential commuted variants. Move the 'ashr' candidate to Op1.
+ // We're relying on the fact that we only do this transform when the shift has
+ // exactly 2 uses and the add has exactly 1 use (otherwise, we might increase
+ // instructions).
+ if (Op0->getNumUses() == 2)
+ std::swap(Op0, Op1);
+
+ const APInt *ShAmt;
+ Type *Ty = I.getType();
+ if (match(Op1, m_AShr(m_Value(A), m_APInt(ShAmt))) &&
+ Op1->getNumUses() == 2 && *ShAmt == Ty->getScalarSizeInBits() - 1 &&
+ match(Op0, m_OneUse(m_c_Add(m_Specific(A), m_Specific(Op1))))) {
+ // B = ashr i32 A, 31 ; smear the sign bit
+ // xor (add A, B), B ; add -1 and flip bits if negative
+ // --> (A < 0) ? -A : A
+ Value *Cmp = Builder.CreateICmpSLT(A, ConstantInt::getNullValue(Ty));
+ return SelectInst::Create(Cmp, Builder.CreateNeg(A), A);
+ }
+
return Changed ? &I : nullptr;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D40984.125995.patch
Type: text/x-patch
Size: 3990 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171207/6835bf7e/attachment.bin>
More information about the llvm-commits
mailing list