[llvm] r289629 - [X86][InstCombine] Teach SimplifyDemandedVectorElts to handle scalar round intrinsics more correctly.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 13 19:17:30 PST 2016
Author: ctopper
Date: Tue Dec 13 21:17:30 2016
New Revision: 289629
URL: http://llvm.org/viewvc/llvm-project?rev=289629&view=rev
Log:
[X86][InstCombine] Teach SimplifyDemandedVectorElts to handle scalar round intrinsics more correctly.
Now we only pass bit 0 of the DemandedElts to optimize operand 1 as we recurse since the upper bits are unused. Similarly we clear bit 0 for optimizing operand 0.
Also calculate UndefElts correctly.
Simplify InstCombineCalls for these instrinics to just call SimplifyDemandedVectorElts for the call instrution to reuse this support.
Modified:
llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=289629&r1=289628&r2=289629&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Tue Dec 13 21:17:30 2016
@@ -1437,12 +1437,6 @@ Instruction *InstCombiner::visitCallInst
APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
};
- auto SimplifyDemandedVectorEltsHigh = [this](Value *Op, unsigned Width,
- unsigned DemandedWidth) {
- APInt UndefElts(Width, 0);
- APInt DemandedElts = APInt::getHighBitsSet(Width, DemandedWidth);
- return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
- };
switch (II->getIntrinsicID()) {
default: break;
@@ -1799,33 +1793,14 @@ Instruction *InstCombiner::visitCallInst
break;
}
- case Intrinsic::x86_sse41_round_ss:
- case Intrinsic::x86_sse41_round_sd: {
- // These intrinsics demand the upper elements of the first input vector and
- // the lowest element of the second input vector.
- bool MadeChange = false;
- Value *Arg0 = II->getArgOperand(0);
- Value *Arg1 = II->getArgOperand(1);
- unsigned VWidth = Arg0->getType()->getVectorNumElements();
- if (Value *V = SimplifyDemandedVectorEltsHigh(Arg0, VWidth, VWidth - 1)) {
- II->setArgOperand(0, V);
- MadeChange = true;
- }
- if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
- II->setArgOperand(1, V);
- MadeChange = true;
- }
- if (MadeChange)
- return II;
- break;
- }
-
case Intrinsic::x86_sse_cmp_ss:
case Intrinsic::x86_sse_min_ss:
case Intrinsic::x86_sse_max_ss:
case Intrinsic::x86_sse2_cmp_sd:
case Intrinsic::x86_sse2_min_sd:
case Intrinsic::x86_sse2_max_sd:
+ case Intrinsic::x86_sse41_round_ss:
+ case Intrinsic::x86_sse41_round_sd:
case Intrinsic::x86_xop_vfrcz_ss:
case Intrinsic::x86_xop_vfrcz_sd: {
unsigned VWidth = II->getType()->getVectorNumElements();
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp?rev=289629&r1=289628&r2=289629&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp Tue Dec 13 21:17:30 2016
@@ -1321,25 +1321,33 @@ Value *InstCombiner::SimplifyDemandedVec
break;
}
- // Binary scalar-as-vector operations that work column-wise. A dest element
- // is a function of the corresponding input elements from the two inputs.
+ // Binary scalar-as-vector operations that work column-wise. The high
+ // elements come from operand 0 and the low element comes from operand 1.
case Intrinsic::x86_sse41_round_ss:
- case Intrinsic::x86_sse41_round_sd:
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
+ case Intrinsic::x86_sse41_round_sd: {
+ // Don't use the low element of operand 0.
+ APInt DemandedElts2 = DemandedElts;
+ DemandedElts2.clearBit(0);
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts2,
UndefElts, Depth + 1);
if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
- UndefElts2, Depth + 1);
- if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
// If lowest element of a scalar op isn't used then use Arg0.
- if (DemandedElts.getLoBits(1) != 1)
+ if (!DemandedElts[0])
return II->getArgOperand(0);
- // Output elements are undefined if both are undefined. Consider things
- // like undef&0. The result is known zero, not undef.
- UndefElts &= UndefElts2;
+ // Only lower element is used for operand 1.
+ DemandedElts = 1;
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
+ UndefElts2, Depth + 1);
+ if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+
+ // Take the high undef elements from operand 0 and take the lower element
+ // from operand 1.
+ UndefElts.clearBit(0);
+ UndefElts |= UndefElts2[0];
break;
+ }
case Intrinsic::x86_fma_vfmadd_ss:
case Intrinsic::x86_fma_vfmsub_ss:
More information about the llvm-commits
mailing list