[PATCH] D35722: [DAGCombine] Improve (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) folding
Simon Pilgrim via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 21 05:43:41 PDT 2017
RKSimon created this revision.
Add support for cases where only some c1+c2 results exceed the max bitshift, clamping accordingly.
Repository:
rL LLVM
https://reviews.llvm.org/D35722
Files:
lib/CodeGen/SelectionDAG/DAGCombiner.cpp
test/CodeGen/X86/combine-sra.ll
Index: test/CodeGen/X86/combine-sra.ll
===================================================================
--- test/CodeGen/X86/combine-sra.ll
+++ test/CodeGen/X86/combine-sra.ll
@@ -131,24 +131,15 @@
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: psrad $27, %xmm1
; SSE-NEXT: movdqa %xmm0, %xmm2
-; SSE-NEXT: psrad $5, %xmm2
+; SSE-NEXT: psrad $15, %xmm2
; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrad $31, %xmm1
-; SSE-NEXT: psrad $1, %xmm0
-; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
-; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrad $10, %xmm1
-; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; SSE-NEXT: psrad $31, %xmm0
-; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_ashr_ashr3:
; AVX: # BB#0:
; AVX-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%1 = ashr <4 x i32> %x, <i32 1, i32 5, i32 50, i32 27>
%2 = ashr <4 x i32> %1, <i32 33, i32 10, i32 33, i32 0>
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5652,32 +5652,17 @@
}
// fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
- if (N0.getOpcode() == ISD::SRA) {
+ // clamp (add c1, c2) to max shift.
+ if (N0.getOpcode() == ISD::SRA &&
+ isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true) &&
+ N1.getValueType() == N0.getOperand(1).getValueType()) {
SDLoc DL(N);
EVT ShiftVT = N1.getValueType();
-
- auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
- ConstantSDNode *RHS) {
- APInt c1 = LHS->getAPIntValue();
- APInt c2 = RHS->getAPIntValue();
- zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
- return (c1 + c2).uge(OpSizeInBits);
- };
- if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
- return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
- DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT));
-
- auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
- ConstantSDNode *RHS) {
- APInt c1 = LHS->getAPIntValue();
- APInt c2 = RHS->getAPIntValue();
- zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
- return (c1 + c2).ult(OpSizeInBits);
- };
- if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
- SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
- return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum);
- }
+ SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
+ SDValue Limit = DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT);
+ SDValue Clamp = DAG.getNode(ISD::UMIN, DL, ShiftVT, Sum, Limit);
+ return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Clamp);
}
// fold (sra (shl X, m), (sub result_size, n))
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D35722.107659.patch
Type: text/x-patch
Size: 3482 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170721/f9a87d6a/attachment.bin>
More information about the llvm-commits
mailing list