[llvm] 5340434 - [X86][SSE] combineExtractWithShuffle - extract(bitcast(broadcast(x))) --> x

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 22 10:03:14 PST 2020


Author: Simon Pilgrim
Date: 2020-01-22T18:02:58Z
New Revision: 5340434c94c4c48abf755d12b2c3a19ca46fabaa

URL: https://github.com/llvm/llvm-project/commit/5340434c94c4c48abf755d12b2c3a19ca46fabaa
DIFF: https://github.com/llvm/llvm-project/commit/5340434c94c4c48abf755d12b2c3a19ca46fabaa.diff

LOG: [X86][SSE] combineExtractWithShuffle - extract(bitcast(broadcast(x))) --> x

Removes some unnecessary gpr<-->fpu traffic

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/bitcast-vector-bool.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 54613776a3e4..64cc73eec392 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37102,11 +37102,24 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
 
   SDValue SrcBC = peekThroughBitcasts(Src);
 
-  // Handle extract(broadcast(scalar_value)), it doesn't matter what index is.
+  // Handle extract(bitcast(broadcast(scalar_value))).
   if (X86ISD::VBROADCAST == SrcBC.getOpcode()) {
     SDValue SrcOp = SrcBC.getOperand(0);
     if (SrcOp.getValueSizeInBits() == VT.getSizeInBits())
       return DAG.getBitcast(VT, SrcOp);
+
+    EVT SrcOpVT = SrcOp.getValueType();
+    if (SrcOpVT.isScalarInteger() && VT.isInteger() &&
+        (SrcOpVT.getSizeInBits() % SrcSVT.getSizeInBits()) == 0) {
+      unsigned Scale = SrcOpVT.getSizeInBits() / SrcSVT.getSizeInBits();
+      unsigned Offset = IdxC.urem(Scale) * SrcSVT.getSizeInBits();
+      // TODO support non-zero offsets.
+      if (Offset == 0) {
+        SrcOp = DAG.getZExtOrTrunc(SrcOp, dl, SrcVT.getScalarType());
+        SrcOp = DAG.getZExtOrTrunc(SrcOp, dl, VT);
+        return SrcOp;
+      }
+    }
   }
 
   // If we're extracting a single element from a broadcast load and there are
@@ -37126,7 +37139,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  // Handle extract(scalar_to_vector(scalar_value)) for integers.
+  // Handle extract(bitcast(scalar_to_vector(scalar_value))) for integers.
   // TODO: Move to DAGCombine?
   if (SrcBC.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isInteger() &&
       SrcBC.getValueType().isInteger() &&

diff  --git a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
index 8dde976912b0..8afff857cdad 100644
--- a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
+++ b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
@@ -77,9 +77,7 @@ define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind {
 ; AVX512-NEXT:    movzbl %cl, %eax
 ; AVX512-NEXT:    shrl $2, %eax
 ; AVX512-NEXT:    andl $3, %eax
-; AVX512-NEXT:    vpbroadcastq %rax, %xmm0
 ; AVX512-NEXT:    andl $3, %ecx
-; AVX512-NEXT:    vpextrb $8, %xmm0, %eax
 ; AVX512-NEXT:    addb %cl, %al
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
@@ -124,9 +122,7 @@ define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind {
 ; AVX512-NEXT:    kmovd %k0, %ecx
 ; AVX512-NEXT:    movzbl %cl, %eax
 ; AVX512-NEXT:    shrl $4, %eax
-; AVX512-NEXT:    vpbroadcastq %rax, %xmm0
 ; AVX512-NEXT:    andl $15, %ecx
-; AVX512-NEXT:    vpextrb $8, %xmm0, %eax
 ; AVX512-NEXT:    addb %cl, %al
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
@@ -214,9 +210,7 @@ define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind {
 ; AVX512-NEXT:    movzbl %cl, %eax
 ; AVX512-NEXT:    shrl $2, %eax
 ; AVX512-NEXT:    andl $3, %eax
-; AVX512-NEXT:    vpbroadcastq %rax, %xmm0
 ; AVX512-NEXT:    andl $3, %ecx
-; AVX512-NEXT:    vpextrb $8, %xmm0, %eax
 ; AVX512-NEXT:    addb %cl, %al
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
@@ -264,9 +258,7 @@ define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind {
 ; AVX512-NEXT:    kmovd %k0, %ecx
 ; AVX512-NEXT:    movzbl %cl, %eax
 ; AVX512-NEXT:    shrl $4, %eax
-; AVX512-NEXT:    vpbroadcastq %rax, %xmm0
 ; AVX512-NEXT:    andl $15, %ecx
-; AVX512-NEXT:    vpextrb $8, %xmm0, %eax
 ; AVX512-NEXT:    addb %cl, %al
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
@@ -451,9 +443,7 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
 ; AVX512-NEXT:    kmovd %k0, %ecx
 ; AVX512-NEXT:    movzbl %cl, %eax
 ; AVX512-NEXT:    shrl $4, %eax
-; AVX512-NEXT:    vpbroadcastq %rax, %xmm0
 ; AVX512-NEXT:    andl $15, %ecx
-; AVX512-NEXT:    vpextrb $8, %xmm0, %eax
 ; AVX512-NEXT:    addb %cl, %al
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper


        


More information about the llvm-commits mailing list