[llvm] 931ec74 - [X86][AVX] Don't fold PEXTR(VBROADCAST_LOAD(X)) -> LOAD(X).
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 7 11:14:34 PDT 2020
Author: Simon Pilgrim
Date: 2020-07-07T19:10:03+01:00
New Revision: 931ec74f7a29f53e18b574dc9500012ecbeba23a
URL: https://github.com/llvm/llvm-project/commit/931ec74f7a29f53e18b574dc9500012ecbeba23a
DIFF: https://github.com/llvm/llvm-project/commit/931ec74f7a29f53e18b574dc9500012ecbeba23a.diff
LOG: [X86][AVX] Don't fold PEXTR(VBROADCAST_LOAD(X)) -> LOAD(X).
We were checking the VBROADCAST_LOAD element size against the extraction destination size instead of the extracted vector element size - PEXTRW/PEXTB have implicit zext'ing so have i32 destination sizes for v8i16/v16i8 vectors, resulting in us extracting from the wrong part of a load.
This patch bails from the fold if the vector element sizes don't match, and we now use the target constant extraction code later on like the pre-AVX2 targets, fixing the test case.
Found by internal fuzzing tests.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/extractelement-load.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 575f358361b1..023b5975f0c7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -38986,7 +38986,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
auto *MemIntr = cast<MemIntrinsicSDNode>(SrcBC);
unsigned SrcBCWidth = SrcBC.getScalarValueSizeInBits();
if (MemIntr->getMemoryVT().getSizeInBits() == SrcBCWidth &&
- VT.getSizeInBits() == SrcBCWidth) {
+ VT.getSizeInBits() == SrcBCWidth && SrcEltBits == SrcBCWidth) {
SDValue Load = DAG.getLoad(VT, dl, MemIntr->getChain(),
MemIntr->getBasePtr(),
MemIntr->getPointerInfo(),
diff --git a/llvm/test/CodeGen/X86/extractelement-load.ll b/llvm/test/CodeGen/X86/extractelement-load.ll
index 752ba5b2a33d..94628c70d989 100644
--- a/llvm/test/CodeGen/X86/extractelement-load.ll
+++ b/llvm/test/CodeGen/X86/extractelement-load.ll
@@ -267,8 +267,8 @@ entry:
ret float %cond
}
-; FIXME: Incorrect AVX2 codegen due to bad extraction from a VBROADCAST_LOAD of the <2 x i16> constant bitcast as <4 x i32>.
-define void @subextract_broadcast_load_constant(<2 x i16>* nocapture %0, i16* nocapture %1, i16* nocapture %2) {
+; Test for bad extractions from a VBROADCAST_LOAD of the <2 x i16> non-uniform constant bitcast as <4 x i32>.
+define void @subextract_broadcast_load_constant(<2 x i16>* nocapture %0, i16* nocapture %1, i16* nocapture %2) {
; X32-SSE2-LABEL: subextract_broadcast_load_constant:
; X32-SSE2: # %bb.0:
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -279,26 +279,12 @@ define void @subextract_broadcast_load_constant(<2 x i16>* nocapture %0, i16* no
; X32-SSE2-NEXT: movw $-24160, (%eax) # imm = 0xA1A0
; X32-SSE2-NEXT: retl
;
-; X64-SSSE3-LABEL: subextract_broadcast_load_constant:
-; X64-SSSE3: # %bb.0:
-; X64-SSSE3-NEXT: movl $-1583308898, (%rdi) # imm = 0xA1A09F9E
-; X64-SSSE3-NEXT: movw $-24674, (%rsi) # imm = 0x9F9E
-; X64-SSSE3-NEXT: movw $-24160, (%rdx) # imm = 0xA1A0
-; X64-SSSE3-NEXT: retq
-;
-; X64-AVX1-LABEL: subextract_broadcast_load_constant:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: movl $-1583308898, (%rdi) # imm = 0xA1A09F9E
-; X64-AVX1-NEXT: movw $-24674, (%rsi) # imm = 0x9F9E
-; X64-AVX1-NEXT: movw $-24160, (%rdx) # imm = 0xA1A0
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: subextract_broadcast_load_constant:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: movl $-1583308898, (%rdi) # imm = 0xA1A09F9E
-; X64-AVX2-NEXT: movw $-24674, (%rsi) # imm = 0x9F9E
-; X64-AVX2-NEXT: movw $-24674, (%rdx) # imm = 0x9F9E
-; X64-AVX2-NEXT: retq
+; X64-LABEL: subextract_broadcast_load_constant:
+; X64: # %bb.0:
+; X64-NEXT: movl $-1583308898, (%rdi) # imm = 0xA1A09F9E
+; X64-NEXT: movw $-24674, (%rsi) # imm = 0x9F9E
+; X64-NEXT: movw $-24160, (%rdx) # imm = 0xA1A0
+; X64-NEXT: retq
%4 = bitcast <2 x i16>* %0 to i8*
store i8 -98, i8* %4, align 1
%5 = getelementptr inbounds i8, i8* %4, i64 1
More information about the llvm-commits
mailing list