[llvm] r288499 - [X86][SSE] Add support for extracting constant bit data from broadcasted constants
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 2 05:16:08 PST 2016
Author: rksimon
Date: Fri Dec 2 07:16:08 2016
New Revision: 288499
URL: http://llvm.org/viewvc/llvm-project?rev=288499&view=rev
Log:
[X86][SSE] Add support for extracting constant bit data from broadcasted constants
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll
llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
llvm/trunk/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=288499&r1=288498&r2=288499&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Dec 2 07:16:08 2016
@@ -5138,6 +5138,8 @@ static bool getTargetConstantBitsFromNod
assert(UndefElts.empty() && "Expected an empty UndefElts vector");
assert(EltBits.empty() && "Expected an empty EltBits vector");
+ Op = peekThroughBitcasts(Op);
+
EVT VT = Op.getValueType();
unsigned SizeInBits = VT.getSizeInBits();
assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!");
@@ -5170,35 +5172,35 @@ static bool getTargetConstantBitsFromNod
return true;
};
- // Extract constant bits from constant pool scalar/vector.
+ auto ExtractConstantBits = [SizeInBits](const Constant *Cst, APInt &Mask,
+ APInt &Undefs) {
+ if (!Cst)
+ return false;
+ unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits();
+ if (isa<UndefValue>(Cst)) {
+ Mask = APInt::getNullValue(SizeInBits);
+ Undefs = APInt::getLowBitsSet(SizeInBits, CstSizeInBits);
+ return true;
+ }
+ if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
+ Mask = CInt->getValue().zextOrTrunc(SizeInBits);
+ Undefs = APInt::getNullValue(SizeInBits);
+ return true;
+ }
+ if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
+ Mask = CFP->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits);
+ Undefs = APInt::getNullValue(SizeInBits);
+ return true;
+ }
+ return false;
+ };
+
+ // Extract constant bits from constant pool vector.
if (auto *Cst = getTargetConstantFromNode(Op)) {
Type *CstTy = Cst->getType();
if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits()))
return false;
- auto ExtractConstantBits = [SizeInBits](const Constant *Cst, APInt &Mask,
- APInt &Undefs) {
- if (!Cst)
- return false;
- unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits();
- if (isa<UndefValue>(Cst)) {
- Mask = APInt::getNullValue(SizeInBits);
- Undefs = APInt::getLowBitsSet(SizeInBits, CstSizeInBits);
- return true;
- }
- if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
- Mask = CInt->getValue().zextOrTrunc(SizeInBits);
- Undefs = APInt::getNullValue(SizeInBits);
- return true;
- }
- if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
- Mask = CFP->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits);
- Undefs = APInt::getNullValue(SizeInBits);
- return true;
- }
- return false;
- };
-
unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
for (unsigned i = 0, e = CstTy->getVectorNumElements(); i != e; ++i) {
APInt Bits, Undefs;
@@ -5211,9 +5213,27 @@ static bool getTargetConstantBitsFromNod
return SplitBitData();
}
+ // Extract constant bits from a broadcasted constant pool scalar.
+ if (Op.getOpcode() == X86ISD::VBROADCAST &&
+ EltSizeInBits <= Op.getScalarValueSizeInBits()) {
+ if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
+ APInt Bits, Undefs;
+ if (ExtractConstantBits(Broadcast, Bits, Undefs)) {
+ unsigned NumBroadcastBits = Op.getScalarValueSizeInBits();
+ unsigned NumBroadcastElts = SizeInBits / NumBroadcastBits;
+ for (unsigned i = 0; i != NumBroadcastElts; ++i) {
+ MaskBits |= Bits.shl(i * NumBroadcastBits);
+ UndefBits |= Undefs.shl(i * NumBroadcastBits);
+ }
+ return SplitBitData();
+ }
+ }
+ }
+
return false;
}
+// TODO: Merge more of this with getTargetConstantBitsFromNode.
static bool getTargetShuffleMaskIndices(SDValue MaskNode,
unsigned MaskEltSizeInBits,
SmallVectorImpl<uint64_t> &RawMask) {
Modified: llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll?rev=288499&r1=288498&r2=288499&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll Fri Dec 2 07:16:08 2016
@@ -41,17 +41,11 @@ define <4 x i32> @_clearupper4xi32a(<4 x
; SSE-NEXT: andps {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: _clearupper4xi32a:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: _clearupper4xi32a:
-; AVX2: # BB#0:
-; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
-; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
+; AVX-LABEL: _clearupper4xi32a:
+; AVX: # BB#0:
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
+; AVX-NEXT: retq
%x0 = extractelement <4 x i32> %0, i32 0
%x1 = extractelement <4 x i32> %0, i32 1
%x2 = extractelement <4 x i32> %0, i32 2
Modified: llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll?rev=288499&r1=288498&r2=288499&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll Fri Dec 2 07:16:08 2016
@@ -590,8 +590,8 @@ define <2 x double> @uitofp_4i32_to_2f64
; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1
; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2
; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
-; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
@@ -889,8 +889,8 @@ define <4 x double> @uitofp_4i32_to_4f64
; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1
; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2
; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
-; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -3274,8 +3274,8 @@ define <4 x double> @uitofp_load_4i32_to
; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1
; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2
; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
-; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll?rev=288499&r1=288498&r2=288499&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll Fri Dec 2 07:16:08 2016
@@ -26,9 +26,6 @@
; AVX2: [[FPMASKCSTADDR:.LCPI[0-9_]+]]:
; AVX2-NEXT: .long 1199570944 # float 65536
-; AVX2: [[MASKCSTADDR:.LCPI[0-9_]+]]:
-; AVX2-NEXT: .long 65535 # 0xffff
-
define <4 x float> @test_uitofp_v4i32_to_v4f32(<4 x i32> %arg) {
; SSE2-LABEL: test_uitofp_v4i32_to_v4f32:
; SSE2: # BB#0:
@@ -69,8 +66,8 @@ define <4 x float> @test_uitofp_v4i32_to
; AVX2-NEXT: vcvtdq2ps %xmm1, %xmm1
; AVX2-NEXT: vbroadcastss [[FPMASKCSTADDR]](%rip), %xmm2
; AVX2-NEXT: vmulps %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastd [[MASKCSTADDR]](%rip), %xmm2
-; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
; AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
More information about the llvm-commits
mailing list