[llvm] 931ec74 - [X86][AVX] Don't fold PEXTR(VBROADCAST_LOAD(X)) -> LOAD(X).

Tue Jul 7 11:14:34 PDT 2020

Author: Simon Pilgrim
Date: 2020-07-07T19:10:03+01:00
New Revision: 931ec74f7a29f53e18b574dc9500012ecbeba23a

URL: https://github.com/llvm/llvm-project/commit/931ec74f7a29f53e18b574dc9500012ecbeba23a
DIFF: https://github.com/llvm/llvm-project/commit/931ec74f7a29f53e18b574dc9500012ecbeba23a.diff

LOG: [X86][AVX] Don't fold PEXTR(VBROADCAST_LOAD(X)) -> LOAD(X).

We were checking the VBROADCAST_LOAD element size against the extraction destination size instead of the extracted vector element size - PEXTRW/PEXTB have implicit zext'ing so have i32 destination sizes for v8i16/v16i8 vectors, resulting in us extracting from the wrong part of a load.

This patch bails from the fold if the vector element sizes don't match, and we now use the target constant extraction code later on like the pre-AVX2 targets, fixing the test case.

Found by internal fuzzing tests.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/extractelement-load.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 575f358361b1..023b5975f0c7 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -38986,7 +38986,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
     auto *MemIntr = cast<MemIntrinsicSDNode>(SrcBC);
     unsigned SrcBCWidth = SrcBC.getScalarValueSizeInBits();
     if (MemIntr->getMemoryVT().getSizeInBits() == SrcBCWidth &&
-        VT.getSizeInBits() == SrcBCWidth) {
+        VT.getSizeInBits() == SrcBCWidth && SrcEltBits == SrcBCWidth) {
       SDValue Load = DAG.getLoad(VT, dl, MemIntr->getChain(),
                                  MemIntr->getBasePtr(),
                                  MemIntr->getPointerInfo(),

diff  --git a/llvm/test/CodeGen/X86/extractelement-load.ll b/llvm/test/CodeGen/X86/extractelement-load.ll
index 752ba5b2a33d..94628c70d989 100644
--- a/llvm/test/CodeGen/X86/extractelement-load.ll
+++ b/llvm/test/CodeGen/X86/extractelement-load.ll
@@ -267,8 +267,8 @@ entry:
   ret float %cond
 }
 
-; FIXME: Incorrect AVX2 codegen due to bad extraction from a VBROADCAST_LOAD of the <2 x i16> constant bitcast as <4 x i32>.
-define void @subextract_broadcast_load_constant(<2 x i16>* nocapture %0, i16* nocapture %1, i16* nocapture %2)  {
+; Test for bad extractions from a VBROADCAST_LOAD of the <2 x i16> non-uniform constant bitcast as <4 x i32>.
+define void @subextract_broadcast_load_constant(<2 x i16>* nocapture %0, i16* nocapture %1, i16* nocapture %2) {
 ; X32-SSE2-LABEL: subextract_broadcast_load_constant:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -279,26 +279,12 @@ define void @subextract_broadcast_load_constant(<2 x i16>* nocapture %0, i16* no
 ; X32-SSE2-NEXT:    movw $-24160, (%eax) # imm = 0xA1A0
 ; X32-SSE2-NEXT:    retl
 ;
-; X64-SSSE3-LABEL: subextract_broadcast_load_constant:
-; X64-SSSE3:       # %bb.0:
-; X64-SSSE3-NEXT:    movl $-1583308898, (%rdi) # imm = 0xA1A09F9E
-; X64-SSSE3-NEXT:    movw $-24674, (%rsi) # imm = 0x9F9E
-; X64-SSSE3-NEXT:    movw $-24160, (%rdx) # imm = 0xA1A0
-; X64-SSSE3-NEXT:    retq
-;
-; X64-AVX1-LABEL: subextract_broadcast_load_constant:
-; X64-AVX1:       # %bb.0:
-; X64-AVX1-NEXT:    movl $-1583308898, (%rdi) # imm = 0xA1A09F9E
-; X64-AVX1-NEXT:    movw $-24674, (%rsi) # imm = 0x9F9E
-; X64-AVX1-NEXT:    movw $-24160, (%rdx) # imm = 0xA1A0
-; X64-AVX1-NEXT:    retq
-;
-; X64-AVX2-LABEL: subextract_broadcast_load_constant:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl $-1583308898, (%rdi) # imm = 0xA1A09F9E
-; X64-AVX2-NEXT:    movw $-24674, (%rsi) # imm = 0x9F9E
-; X64-AVX2-NEXT:    movw $-24674, (%rdx) # imm = 0x9F9E
-; X64-AVX2-NEXT:    retq
+; X64-LABEL: subextract_broadcast_load_constant:
+; X64:       # %bb.0:
+; X64-NEXT:    movl $-1583308898, (%rdi) # imm = 0xA1A09F9E
+; X64-NEXT:    movw $-24674, (%rsi) # imm = 0x9F9E
+; X64-NEXT:    movw $-24160, (%rdx) # imm = 0xA1A0
+; X64-NEXT:    retq
   %4 = bitcast <2 x i16>* %0 to i8*
   store i8 -98, i8* %4, align 1
   %5 = getelementptr inbounds i8, i8* %4, i64 1