[llvm] 5340434 - [X86][SSE] combineExtractWithShuffle - extract(bitcast(broadcast(x))) --> x
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 22 10:03:14 PST 2020
Author: Simon Pilgrim
Date: 2020-01-22T18:02:58Z
New Revision: 5340434c94c4c48abf755d12b2c3a19ca46fabaa
URL: https://github.com/llvm/llvm-project/commit/5340434c94c4c48abf755d12b2c3a19ca46fabaa
DIFF: https://github.com/llvm/llvm-project/commit/5340434c94c4c48abf755d12b2c3a19ca46fabaa.diff
LOG: [X86][SSE] combineExtractWithShuffle - extract(bitcast(broadcast(x))) --> x
Removes some unnecessary gpr<-->fpu traffic
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/bitcast-vector-bool.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 54613776a3e4..64cc73eec392 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37102,11 +37102,24 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
SDValue SrcBC = peekThroughBitcasts(Src);
- // Handle extract(broadcast(scalar_value)), it doesn't matter what index is.
+ // Handle extract(bitcast(broadcast(scalar_value))).
if (X86ISD::VBROADCAST == SrcBC.getOpcode()) {
SDValue SrcOp = SrcBC.getOperand(0);
if (SrcOp.getValueSizeInBits() == VT.getSizeInBits())
return DAG.getBitcast(VT, SrcOp);
+
+ EVT SrcOpVT = SrcOp.getValueType();
+ if (SrcOpVT.isScalarInteger() && VT.isInteger() &&
+ (SrcOpVT.getSizeInBits() % SrcSVT.getSizeInBits()) == 0) {
+ unsigned Scale = SrcOpVT.getSizeInBits() / SrcSVT.getSizeInBits();
+ unsigned Offset = IdxC.urem(Scale) * SrcSVT.getSizeInBits();
+ // TODO support non-zero offsets.
+ if (Offset == 0) {
+ SrcOp = DAG.getZExtOrTrunc(SrcOp, dl, SrcVT.getScalarType());
+ SrcOp = DAG.getZExtOrTrunc(SrcOp, dl, VT);
+ return SrcOp;
+ }
+ }
}
// If we're extracting a single element from a broadcast load and there are
@@ -37126,7 +37139,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
}
}
- // Handle extract(scalar_to_vector(scalar_value)) for integers.
+ // Handle extract(bitcast(scalar_to_vector(scalar_value))) for integers.
// TODO: Move to DAGCombine?
if (SrcBC.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isInteger() &&
SrcBC.getValueType().isInteger() &&
diff --git a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
index 8dde976912b0..8afff857cdad 100644
--- a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
+++ b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
@@ -77,9 +77,7 @@ define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind {
; AVX512-NEXT: movzbl %cl, %eax
; AVX512-NEXT: shrl $2, %eax
; AVX512-NEXT: andl $3, %eax
-; AVX512-NEXT: vpbroadcastq %rax, %xmm0
; AVX512-NEXT: andl $3, %ecx
-; AVX512-NEXT: vpextrb $8, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
@@ -124,9 +122,7 @@ define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind {
; AVX512-NEXT: kmovd %k0, %ecx
; AVX512-NEXT: movzbl %cl, %eax
; AVX512-NEXT: shrl $4, %eax
-; AVX512-NEXT: vpbroadcastq %rax, %xmm0
; AVX512-NEXT: andl $15, %ecx
-; AVX512-NEXT: vpextrb $8, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
@@ -214,9 +210,7 @@ define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind {
; AVX512-NEXT: movzbl %cl, %eax
; AVX512-NEXT: shrl $2, %eax
; AVX512-NEXT: andl $3, %eax
-; AVX512-NEXT: vpbroadcastq %rax, %xmm0
; AVX512-NEXT: andl $3, %ecx
-; AVX512-NEXT: vpextrb $8, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@@ -264,9 +258,7 @@ define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind {
; AVX512-NEXT: kmovd %k0, %ecx
; AVX512-NEXT: movzbl %cl, %eax
; AVX512-NEXT: shrl $4, %eax
-; AVX512-NEXT: vpbroadcastq %rax, %xmm0
; AVX512-NEXT: andl $15, %ecx
-; AVX512-NEXT: vpextrb $8, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@@ -451,9 +443,7 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
; AVX512-NEXT: kmovd %k0, %ecx
; AVX512-NEXT: movzbl %cl, %eax
; AVX512-NEXT: shrl $4, %eax
-; AVX512-NEXT: vpbroadcastq %rax, %xmm0
; AVX512-NEXT: andl $15, %ecx
-; AVX512-NEXT: vpextrb $8, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
More information about the llvm-commits
mailing list