[PATCH] Revert 221429 and fix bug in TargetLowering::SimplifyDemandedBits
Michael Kuperstein
michael.m.kuperstein at intel.com
Wed Feb 18 01:45:41 PST 2015
REPOSITORY
rL LLVM
http://reviews.llvm.org/D6949
Files:
llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-blend.ll
llvm/trunk/test/CodeGen/X86/vselect-avx.ll
Index: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -793,19 +793,26 @@
APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
// If we only care about the highest bit, don't bother shifting right.
- if (MsbMask == DemandedMask) {
+ if (MsbMask == NewMask) {
unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
SDValue InOp = Op.getOperand(0);
+ unsigned VTBits = Op->getValueType(0).getScalarType().getSizeInBits();
+ bool AlreadySignExtended =
+ TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1;
+ // However if the input is already sign extended we expect the sign
+ // extension to be dropped altogether later and do not simplify.
+ if (!AlreadySignExtended) {
+ // Compute the correct shift amount type, which must be getShiftAmountTy
+ // for scalar types after legalization.
+ EVT ShiftAmtTy = Op.getValueType();
+ if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
+ ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
- // Compute the correct shift amount type, which must be getShiftAmountTy
- // for scalar types after legalization.
- EVT ShiftAmtTy = Op.getValueType();
- if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
- ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
-
- SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
- Op.getValueType(), InOp, ShiftAmt));
+ SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+ Op.getValueType(), InOp,
+ ShiftAmt));
+ }
}
// Sign extension. Compute the demanded bits in the result that are not
Index: llvm/trunk/test/CodeGen/X86/vselect-avx.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/vselect-avx.ll
+++ llvm/trunk/test/CodeGen/X86/vselect-avx.ll
@@ -59,19 +59,15 @@
;
; <rdar://problem/18819506>
-; Note: For now, hard code ORIG_MASK and SHRUNK_MASK registers, because we
-; cannot express that ORIG_MASK must not be equal to ORIG_MASK. Otherwise,
-; even a faulty pattern would pass!
-;
; CHECK-LABEL: test3:
-; Compute the original mask.
-; CHECK: vpcmpeqd {{%xmm[0-9]+}}, {{%xmm[0-9]+}}, [[ORIG_MASK:%xmm0]]
-; Shrink the bit of the mask.
-; CHECK-NEXT: vpslld $31, [[ORIG_MASK]], [[SHRUNK_MASK:%xmm3]]
-; Use the shrunk mask in the blend.
-; CHECK-NEXT: vblendvps [[SHRUNK_MASK]], %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
-; Use the original mask in the and.
-; CHECK-NEXT: vpand LCPI2_2(%rip), [[ORIG_MASK]], {{%xmm[0-9]+}}
+; Compute the mask.
+; CHECK: vpcmpeqd {{%xmm[0-9]+}}, {{%xmm[0-9]+}}, [[MASK:%xmm[0-9]+]]
+; Do not shrink the bit of the mask.
+; CHECK-NOT: vpslld $31, [[MASK]], {{%xmm[0-9]+}}
+; Use the mask in the blend.
+; CHECK-NEXT: vblendvps [[MASK]], %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
+; Use the mask in the and.
+; CHECK-NEXT: vpand LCPI2_2(%rip), [[MASK]], {{%xmm[0-9]+}}
; CHECK: retq
define void @test3(<4 x i32> %induction30, <4 x i16>* %tmp16, <4 x i16>* %tmp17, <4 x i16> %tmp3, <4 x i16> %tmp12) {
%tmp6 = srem <4 x i32> %induction30, <i32 3, i32 3, i32 3, i32 3>
Index: llvm/trunk/test/CodeGen/X86/vector-blend.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/vector-blend.ll
+++ llvm/trunk/test/CodeGen/X86/vector-blend.ll
@@ -419,8 +419,8 @@
;
; SSE41-LABEL: vsel_i648:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7]
-; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
+; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
+; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
; SSE41-NEXT: movaps %xmm5, %xmm1
; SSE41-NEXT: movaps %xmm7, %xmm3
; SSE41-NEXT: retq
EMAIL PREFERENCES
http://reviews.llvm.org/settings/panel/emailpreferences/
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D6949.20154.patch
Type: text/x-patch
Size: 4244 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150218/930254a2/attachment.bin>
More information about the llvm-commits
mailing list