[llvm] 5a6792a - [X86] combine-add.ll - add test case for PR52039 / Issue #51381
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun May 8 05:45:30 PDT 2022
Author: Simon Pilgrim
Date: 2022-05-08T13:45:23+01:00
New Revision: 5a6792a146a388ef0147f03043eccfd898816504
URL: https://github.com/llvm/llvm-project/commit/5a6792a146a388ef0147f03043eccfd898816504
DIFF: https://github.com/llvm/llvm-project/commit/5a6792a146a388ef0147f03043eccfd898816504.diff
LOG: [X86] combine-add.ll - add test case for PR52039 / Issue #51381
Also split AVX1/AVX2 test coverage
Added:
Modified:
llvm/test/CodeGen/X86/combine-add.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/combine-add.ll b/llvm/test/CodeGen/X86/combine-add.ll
index fb60bc852189..413a7a26bb0b 100644
--- a/llvm/test/CodeGen/X86/combine-add.ll
+++ b/llvm/test/CodeGen/X86/combine-add.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
; fold (add x, 0) -> x
define <4 x i32> @combine_vec_add_to_zero(<4 x i32> %a) {
@@ -223,6 +224,63 @@ define <4 x i32> @combine_vec_add_sub_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32>
ret <4 x i32> %3
}
+; FIXME: missing oneuse limit on fold
+define void @PR52039(<8 x i32>* %pa, <8 x i32>* %pb) {
+; SSE-LABEL: PR52039:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqu (%rdi), %xmm0
+; SSE-NEXT: movdqu 16(%rdi), %xmm1
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [10,10,10,10]
+; SSE-NEXT: movdqa %xmm2, %xmm3
+; SSE-NEXT: psubd %xmm1, %xmm3
+; SSE-NEXT: psubd %xmm0, %xmm2
+; SSE-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3]
+; SSE-NEXT: movdqu %xmm2, (%rsi)
+; SSE-NEXT: pmulld %xmm0, %xmm2
+; SSE-NEXT: pmulld %xmm3, %xmm0
+; SSE-NEXT: movdqu %xmm3, 16(%rsi)
+; SSE-NEXT: movdqu %xmm0, 16(%rdi)
+; SSE-NEXT: movdqu %xmm2, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: PR52039:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovdqu (%rdi), %xmm0
+; AVX1-NEXT: vmovdqu 16(%rdi), %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [10,10,10,10]
+; AVX1-NEXT: vpsubd %xmm0, %xmm2, %xmm3
+; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpaddd %xmm1, %xmm1, %xmm4
+; AVX1-NEXT: vpaddd %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm4
+; AVX1-NEXT: vpaddd %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [30,30,30,30]
+; AVX1-NEXT: vpsubd %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vpsubd %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vmovdqu %xmm2, 16(%rsi)
+; AVX1-NEXT: vmovdqu %xmm3, (%rsi)
+; AVX1-NEXT: vmovdqu %xmm1, 16(%rdi)
+; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: PR52039:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [10,10,10,10,10,10,10,10]
+; AVX2-NEXT: vpsubd (%rdi), %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3]
+; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vmovdqu %ymm0, (%rsi)
+; AVX2-NEXT: vmovdqu %ymm1, (%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+ %load = load <8 x i32>, <8 x i32>* %pa, align 4
+ %sub = sub nsw <8 x i32> <i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>, %load
+ %mul = mul nsw <8 x i32> %sub, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ store <8 x i32> %sub, <8 x i32>* %pb, align 4
+ store <8 x i32> %mul, <8 x i32>* %pa, align 4
+ ret void
+}
+
; fold (a+b) -> (a|b) iff a and b share no bits.
define <4 x i32> @combine_vec_add_uniquebits(<4 x i32> %a, <4 x i32> %b) {
; SSE-LABEL: combine_vec_add_uniquebits:
@@ -232,14 +290,21 @@ define <4 x i32> @combine_vec_add_uniquebits(<4 x i32> %a, <4 x i32> %b) {
; SSE-NEXT: orps %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: combine_vec_add_uniquebits:
-; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [61680,61680,61680,61680]
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [3855,3855,3855,3855]
-; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: combine_vec_add_uniquebits:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: combine_vec_add_uniquebits:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [61680,61680,61680,61680]
+; AVX2-NEXT: vandps %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [3855,3855,3855,3855]
+; AVX2-NEXT: vandps %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
%1 = and <4 x i32> %a, <i32 61680, i32 61680, i32 61680, i32 61680>
%2 = and <4 x i32> %b, <i32 3855, i32 3855, i32 3855, i32 3855>
%3 = add <4 x i32> %1, %2
More information about the llvm-commits
mailing list