[llvm] 05671ce - [X86] Add tests showing failure to push freeze through SSE PMADD nodes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 22 06:31:39 PDT 2024
Author: Simon Pilgrim
Date: 2024-07-22T14:31:11+01:00
New Revision: 05671cec6f736c65b7df29234db828116e6d95dd
URL: https://github.com/llvm/llvm-project/commit/05671cec6f736c65b7df29234db828116e6d95dd
DIFF: https://github.com/llvm/llvm-project/commit/05671cec6f736c65b7df29234db828116e6d95dd.diff
LOG: [X86] Add tests showing failure to push freeze through SSE PMADD nodes
PMADD guarantee inbounds/saturated ext-multiply-add results
Test to help with regression identified on #84924
Added:
Modified:
llvm/test/CodeGen/X86/combine-pmadd.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/combine-pmadd.ll b/llvm/test/CodeGen/X86/combine-pmadd.ll
index 565d9ef6eb3e6..8ba5a25678122 100644
--- a/llvm/test/CodeGen/X86/combine-pmadd.ll
+++ b/llvm/test/CodeGen/X86/combine-pmadd.ll
@@ -63,6 +63,37 @@ define <8 x i32> @combine_pmaddwd_concat(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>
ret <8 x i32> %3
}
+define <8 x i32> @combine_pmaddwd_concat_freeze(<8 x i16> %a0, <8 x i16> %a1) {
+; SSE-LABEL: combine_pmaddwd_concat_freeze:
+; SSE: # %bb.0:
+; SSE-NEXT: pmovsxbw {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
+; SSE-NEXT: pmaddwd %xmm2, %xmm0
+; SSE-NEXT: pmaddwd %xmm2, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: combine_pmaddwd_concat_freeze:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vpmaddwd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpmaddwd %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: combine_pmaddwd_concat_freeze:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
+; AVX2-NEXT: vpmaddwd %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpmaddwd %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %lo = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ %hi = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ %flo = freeze <4 x i32> %lo
+ %fhi = freeze <4 x i32> %hi
+ %res = shufflevector <4 x i32> %flo, <4 x i32> %fhi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i32> %res
+}
+
define <4 x i32> @combine_pmaddwd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: combine_pmaddwd_demandedelts:
; SSE: # %bb.0:
@@ -178,6 +209,37 @@ define <16 x i16> @combine_pmaddubsw_concat(<16 x i8> %a0, <16 x i8> %a1, <16 x
ret <16 x i16> %3
}
+define <16 x i16> @combine_pmaddubsw_concat_freeze(<16 x i8> %a0, <16 x i8> %a1) {
+; SSE-LABEL: combine_pmaddubsw_concat_freeze:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; SSE-NEXT: pmaddubsw %xmm2, %xmm0
+; SSE-NEXT: pmaddubsw %xmm2, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: combine_pmaddubsw_concat_freeze:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vpmaddubsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: combine_pmaddubsw_concat_freeze:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX2-NEXT: vpmaddubsw %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %lo = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+ %hi = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+ %flo = freeze <8 x i16> %lo
+ %fhi = freeze <8 x i16> %hi
+ %res = shufflevector <8 x i16> %flo, <8 x i16> %fhi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i16> %res
+}
+
define <8 x i16> @combine_pmaddubsw_demandedelts(<16 x i8> %a0, <16 x i8> %a1) {
; SSE-LABEL: combine_pmaddubsw_demandedelts:
; SSE: # %bb.0:
More information about the llvm-commits
mailing list