[llvm] 6cff71e - [X86][AVX] Add test case showing incorrect extraction from VBROADCAST_LOAD on AVX2 targets

Tue Jul 7 10:33:10 PDT 2020

Author: Simon Pilgrim
Date: 2020-07-07T18:32:32+01:00
New Revision: 6cff71e92e644adf5eab8cb411e5ac053746bbac

URL: https://github.com/llvm/llvm-project/commit/6cff71e92e644adf5eab8cb411e5ac053746bbac
DIFF: https://github.com/llvm/llvm-project/commit/6cff71e92e644adf5eab8cb411e5ac053746bbac.diff

LOG: [X86][AVX] Add test case showing incorrect extraction from VBROADCAST_LOAD on AVX2 targets

On AVX2 we tend to lower BUILD_VECTOR of constants as broadcasts if we can, in this case a <2 x i16> non-uniform constant has been lowered as a <4 x i32> broadcast.

The test case shows that the extraction folding code has incorrectly extracted the wrong part (lower WORD) of the resulting i32 memory source.

Found by internal fuzzing tests.

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/extractelement-load.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/extractelement-load.ll b/llvm/test/CodeGen/X86/extractelement-load.ll
index 5eb24632e066..752ba5b2a33d 100644

--- a/llvm/test/CodeGen/X86/extractelement-load.ll
+++ b/llvm/test/CodeGen/X86/extractelement-load.ll
@@ -266,3 +266,51 @@ entry:
   %cond = select i1 %cmp, float 1.000000e+00, float %vecext
   ret float %cond
 }
+
+; FIXME: Incorrect AVX2 codegen due to bad extraction from a VBROADCAST_LOAD of the <2 x i16> constant bitcast as <4 x i32>.
+define void @subextract_broadcast_load_constant(<2 x i16>* nocapture %0, i16* nocapture %1, i16* nocapture %2)  {
+; X32-SSE2-LABEL: subextract_broadcast_load_constant:
+; X32-SSE2:       # %bb.0:
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X32-SSE2-NEXT:    movl $-1583308898, (%edx) # imm = 0xA1A09F9E
+; X32-SSE2-NEXT:    movw $-24674, (%ecx) # imm = 0x9F9E
+; X32-SSE2-NEXT:    movw $-24160, (%eax) # imm = 0xA1A0
+; X32-SSE2-NEXT:    retl
+;
+; X64-SSSE3-LABEL: subextract_broadcast_load_constant:
+; X64-SSSE3:       # %bb.0:
+; X64-SSSE3-NEXT:    movl $-1583308898, (%rdi) # imm = 0xA1A09F9E
+; X64-SSSE3-NEXT:    movw $-24674, (%rsi) # imm = 0x9F9E
+; X64-SSSE3-NEXT:    movw $-24160, (%rdx) # imm = 0xA1A0
+; X64-SSSE3-NEXT:    retq
+;
+; X64-AVX1-LABEL: subextract_broadcast_load_constant:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    movl $-1583308898, (%rdi) # imm = 0xA1A09F9E
+; X64-AVX1-NEXT:    movw $-24674, (%rsi) # imm = 0x9F9E
+; X64-AVX1-NEXT:    movw $-24160, (%rdx) # imm = 0xA1A0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: subextract_broadcast_load_constant:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    movl $-1583308898, (%rdi) # imm = 0xA1A09F9E
+; X64-AVX2-NEXT:    movw $-24674, (%rsi) # imm = 0x9F9E
+; X64-AVX2-NEXT:    movw $-24674, (%rdx) # imm = 0x9F9E
+; X64-AVX2-NEXT:    retq
+  %4 = bitcast <2 x i16>* %0 to i8*
+  store i8 -98, i8* %4, align 1
+  %5 = getelementptr inbounds i8, i8* %4, i64 1
+  store i8 -97, i8* %5, align 1
+  %6 = getelementptr inbounds i8, i8* %4, i64 2
+  store i8 -96, i8* %6, align 1
+  %7 = getelementptr inbounds i8, i8* %4, i64 3
+  store i8 -95, i8* %7, align 1
+  %8 = load <2 x i16>, <2 x i16>* %0, align 4
+  %9 = extractelement <2 x i16> %8, i32 0
+  store i16 %9, i16* %1, align 2
+  %10 = extractelement <2 x i16> %8, i32 1
+  store i16 %10, i16* %2, align 2
+  ret void
+}