[llvm] bdd3559 - [X86] pmulh.ll - add extra test coverage from #109790
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 30 04:40:57 PDT 2024
Author: Simon Pilgrim
Date: 2024-09-30T12:12:32+01:00
New Revision: bdd35593059441dd626305ee48a91d19b952189c
URL: https://github.com/llvm/llvm-project/commit/bdd35593059441dd626305ee48a91d19b952189c
DIFF: https://github.com/llvm/llvm-project/commit/bdd35593059441dd626305ee48a91d19b952189c.diff
LOG: [X86] pmulh.ll - add extra test coverage from #109790
Shows poor codegen on AVX512 targets
Added:
Modified:
llvm/test/CodeGen/X86/pmulh.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/pmulh.ll b/llvm/test/CodeGen/X86/pmulh.ll
index 502249a87c4892..300da68d9a3b34 100644
--- a/llvm/test/CodeGen/X86/pmulh.ll
+++ b/llvm/test/CodeGen/X86/pmulh.ll
@@ -937,6 +937,65 @@ define <16 x i32> @zext_mulhuw_v16i16_lshr(<16 x i16> %a, <16 x i16> %b) {
ret <16 x i32> %d
}
+; PR109790
+define void @PR109790(ptr sret([32 x i8]) %ret, ptr %a) {
+; SSE-LABEL: PR109790:
+; SSE: # %bb.0:
+; SSE-NEXT: movq %rdi, %rax
+; SSE-NEXT: movdqa {{.*#+}} xmm0 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; SSE-NEXT: movdqa (%rsi), %xmm1
+; SSE-NEXT: pand %xmm0, %xmm1
+; SSE-NEXT: pand 16(%rsi), %xmm0
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [64536,64536,64536,64536,64536,64536,64536,64536]
+; SSE-NEXT: pmulhw %xmm2, %xmm0
+; SSE-NEXT: pmulhw %xmm2, %xmm1
+; SSE-NEXT: movdqa %xmm1, (%rdi)
+; SSE-NEXT: movdqa %xmm0, 16(%rdi)
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: PR109790:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movq %rdi, %rax
+; AVX2-NEXT: vmovdqa (%rsi), %ymm0
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536]
+; AVX2-NEXT: vmovdqa %ymm0, (%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: PR109790:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: movq %rdi, %rax
+; AVX512F-NEXT: vmovdqa (%rsi), %ymm0
+; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512F-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
+; AVX512F-NEXT: vpsrld $16, %zmm0, %zmm0
+; AVX512F-NEXT: vpmovdw %zmm0, (%rdi)
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: PR109790:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: movq %rdi, %rax
+; AVX512BW-NEXT: vmovdqa (%rsi), %ymm0
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512BW-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0]
+; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovdw %zmm0, (%rdi)
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+ %load = load <16 x i16>, ptr %a, align 32
+ %and = and <16 x i16> %load, <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>
+ %ext = zext nneg <16 x i16> %and to <16 x i32>
+ %mul = mul nsw <16 x i32> %ext, <i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000>
+ %srl = lshr <16 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+ %res = trunc nuw <16 x i32> %srl to <16 x i16>
+ store <16 x i16> %res, ptr %ret, align 32
+ ret void
+}
+
; PR109790
define <16 x i16> @zext_mulhuw_v16i16_negative_constant(<16 x i16> %a) {
; SSE-LABEL: zext_mulhuw_v16i16_negative_constant:
More information about the llvm-commits
mailing list