[llvm] [X86][FP16][BF16] Improve vectorization of fcmp (PR #116153)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 9 11:25:26 PST 2024
bgra8 wrote:
@phoebewang here's the reproducer for the crash:
repro.ll:
```
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-generic-linux-gnu"
define void @_test_func(<16 x half> %0) #0 {
%2 = fcmp ord <16 x half> %0, zeroinitializer
%3 = sext <16 x i1> %2 to <16 x i32>
%4 = shufflevector <16 x i32> %3, <16 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%5 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %4, <4 x i32> zeroinitializer)
%6 = shufflevector <8 x i16> %5, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%7 = bitcast <16 x i16> %6 to <32 x i8>
store <32 x i8> %7, ptr null, align 1
ret void
}
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) #1
attributes #0 = { "target-features"="+aes,+avx,+avx2,+avx512f,+avx512vnni,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fxsr,+mmx,+pclmul,+popcnt,+prfchw,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
```
Repro command:
```
$ llc < repro.ll
```
This crashes at this revision and compiles at the previous one.
https://github.com/llvm/llvm-project/pull/116153
More information about the llvm-commits
mailing list