[llvm] r362486 - [SelectionDAG] ComputeNumSignBits - support constant pool values from target
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 4 03:49:06 PDT 2019
Author: rksimon
Date: Tue Jun 4 03:49:06 2019
New Revision: 362486
URL: http://llvm.org/viewvc/llvm-project?rev=362486&view=rev
Log:
[SelectionDAG] ComputeNumSignBits - support constant pool values from target
As I mentioned on D61887 we don't get many hits on ComputeNumSignBits as we did on computeKnownBits.
The case we do get is interesting though - it allows us to use the 'ConditionalNegate' combine in combineLogicBlendIntoPBLENDV to remove a select.
It comes too late for SSE41 (BLENDV) cases, but SSE2 tests can hit it now. We should probably try to make use of this for SSE41+ targets as well - avoiding variable blends is usually a good idea. I'll investigate as a followup.
Differential Revision: https://reviews.llvm.org/D62777
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/trunk/test/CodeGen/X86/combine-sdiv.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=362486&r1=362485&r2=362486&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Tue Jun 4 03:49:06 2019
@@ -3885,6 +3885,36 @@ unsigned SelectionDAG::ComputeNumSignBit
case ISD::ZEXTLOAD: // e.g. i16->i32 = '16' bits known.
Tmp = LD->getMemoryVT().getScalarSizeInBits();
return VTBits - Tmp;
+ case ISD::NON_EXTLOAD:
+ if (const Constant *Cst = TLI->getTargetConstantFromLoad(LD)) {
+ // We only need to handle vectors - computeKnownBits should handle
+ // scalar cases.
+ Type *CstTy = Cst->getType();
+ if (CstTy->isVectorTy() &&
+ (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits()) {
+ Tmp = VTBits;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ if (Constant *Elt = Cst->getAggregateElement(i)) {
+ if (auto *CInt = dyn_cast<ConstantInt>(Elt)) {
+ const APInt &Value = CInt->getValue();
+ Tmp = std::min(Tmp, Value.getNumSignBits());
+ continue;
+ }
+ if (auto *CFP = dyn_cast<ConstantFP>(Elt)) {
+ APInt Value = CFP->getValueAPF().bitcastToAPInt();
+ Tmp = std::min(Tmp, Value.getNumSignBits());
+ continue;
+ }
+ }
+ // Unknown type. Conservatively assume no bits match sign bit.
+ return 1;
+ }
+ return Tmp;
+ }
+ }
+ break;
}
}
}
Modified: llvm/trunk/test/CodeGen/X86/combine-sdiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-sdiv.ll?rev=362486&r1=362485&r2=362486&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-sdiv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-sdiv.ll Tue Jun 4 03:49:06 2019
@@ -2112,11 +2112,9 @@ define <16 x i8> @non_splat_minus_one_di
; SSE2-LABEL: non_splat_minus_one_divisor_0:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,0,255,0,0,0,255,0,0,255,255,255,255,255,255,255]
-; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: psubb %xmm0, %xmm2
-; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: pandn %xmm2, %xmm1
-; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: psubb %xmm0, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: non_splat_minus_one_divisor_0:
@@ -2175,38 +2173,36 @@ define <16 x i8> @non_splat_minus_one_di
define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
; SSE2-LABEL: non_splat_minus_one_divisor_1:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtb %xmm0, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm3
-; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15]
; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm3
; SSE2-NEXT: psrlw $8, %xmm3
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm2
; SSE2-NEXT: psrlw $8, %xmm2
; SSE2-NEXT: packuswb %xmm3, %xmm2
-; SSE2-NEXT: paddb %xmm1, %xmm2
-; SSE2-NEXT: movdqa %xmm2, %xmm3
-; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
-; SSE2-NEXT: psraw $8, %xmm3
-; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm3
-; SSE2-NEXT: psrlw $8, %xmm3
+; SSE2-NEXT: paddb %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm1
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
+; SSE2-NEXT: psraw $8, %xmm1
+; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm1
+; SSE2-NEXT: psrlw $8, %xmm1
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: psraw $8, %xmm2
; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm2
; SSE2-NEXT: psrlw $8, %xmm2
-; SSE2-NEXT: packuswb %xmm3, %xmm2
-; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [0,0,255,0,0,0,255,0,0,255,255,255,255,255,255,255]
-; SSE2-NEXT: pand %xmm3, %xmm2
-; SSE2-NEXT: pandn %xmm1, %xmm3
-; SSE2-NEXT: por %xmm2, %xmm3
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,0,255,255,255,0,255,255,0,0,0,0,255,0,255]
-; SSE2-NEXT: psubb %xmm3, %xmm0
-; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: pandn %xmm3, %xmm1
-; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: packuswb %xmm1, %xmm2
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,0,255,0,0,0,255,0,0,255,255,255,255,255,255,255]
+; SSE2-NEXT: pand %xmm1, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm1
+; SSE2-NEXT: por %xmm2, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,255,0,255,255,255,0,255,255,0,0,0,0,255,0,255]
+; SSE2-NEXT: pxor %xmm0, %xmm1
+; SSE2-NEXT: psubb %xmm0, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: non_splat_minus_one_divisor_1:
More information about the llvm-commits
mailing list