[llvm] 66ba85b - [X86] Precommit test case for D148980.
via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 22 00:50:02 PDT 2023
Author: Luo, Yuanke
Date: 2023-04-22T15:40:38+08:00
New Revision: 66ba85b9c5daf64893216d476c8b85c80582a8b7
URL: https://github.com/llvm/llvm-project/commit/66ba85b9c5daf64893216d476c8b85c80582a8b7
DIFF: https://github.com/llvm/llvm-project/commit/66ba85b9c5daf64893216d476c8b85c80582a8b7.diff
LOG: [X86] Precommit test case for D148980.
Added:
llvm/test/CodeGen/X86/avxvnni-combine.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/avxvnni-combine.ll b/llvm/test/CodeGen/X86/avxvnni-combine.ll
new file mode 100644
index 0000000000000..c736d9c757273
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avxvnni-combine.ll
@@ -0,0 +1,130 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=alderlake | FileCheck %s
+
+; __m256i foo(int cnt, __m256i c, __m256i b, __m256i *p) {
+; for (int i = 0; i < cnt; ++i) {
+; __m256i a = p[i];
+; __m256i m = _mm256_madd_epi16 (b, a);
+; c = _mm256_add_epi32(m, c);
+; }
+; return c;
+; }
+
+define dso_local <4 x i64> @foo(i32 %0, <4 x i64> %1, <4 x i64> %2, ptr %3) {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0:
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: jle .LBB0_6
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: movl %edi, %edx
+; CHECK-NEXT: movl %edx, %eax
+; CHECK-NEXT: andl $3, %eax
+; CHECK-NEXT: cmpl $4, %edi
+; CHECK-NEXT: jae .LBB0_7
+; CHECK-NEXT: # %bb.2:
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: jmp .LBB0_3
+; CHECK-NEXT: .LBB0_7:
+; CHECK-NEXT: andl $-4, %edx
+; CHECK-NEXT: leaq 96(%rsi), %rdi
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: .LBB0_8: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: {vex} vpdpwssd -96(%rdi), %ymm1, %ymm0
+; CHECK-NEXT: {vex} vpdpwssd -64(%rdi), %ymm1, %ymm0
+; CHECK-NEXT: {vex} vpdpwssd -32(%rdi), %ymm1, %ymm0
+; CHECK-NEXT: {vex} vpdpwssd (%rdi), %ymm1, %ymm0
+; CHECK-NEXT: addq $4, %rcx
+; CHECK-NEXT: subq $-128, %rdi
+; CHECK-NEXT: cmpq %rcx, %rdx
+; CHECK-NEXT: jne .LBB0_8
+; CHECK-NEXT: .LBB0_3:
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: je .LBB0_6
+; CHECK-NEXT: # %bb.4: # %.preheader
+; CHECK-NEXT: shlq $5, %rcx
+; CHECK-NEXT: addq %rcx, %rsi
+; CHECK-NEXT: shlq $5, %rax
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: .LBB0_5: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: {vex} vpdpwssd (%rsi,%rcx), %ymm1, %ymm0
+; CHECK-NEXT: addq $32, %rcx
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: jne .LBB0_5
+; CHECK-NEXT: .LBB0_6:
+; CHECK-NEXT: retq
+ %5 = icmp sgt i32 %0, 0
+ br i1 %5, label %6, label %33
+
+6: ; preds = %4
+ %7 = bitcast <4 x i64> %2 to <16 x i16>
+ %8 = bitcast <4 x i64> %1 to <8 x i32>
+ %9 = zext i32 %0 to i64
+ %10 = and i64 %9, 3
+ %11 = icmp ult i32 %0, 4
+ br i1 %11, label %14, label %12
+
+12: ; preds = %6
+ %13 = and i64 %9, 4294967292
+ br label %35
+
+14: ; preds = %35, %6
+ %15 = phi <8 x i32> [ undef, %6 ], [ %57, %35 ]
+ %16 = phi i64 [ 0, %6 ], [ %58, %35 ]
+ %17 = phi <8 x i32> [ %8, %6 ], [ %57, %35 ]
+ %18 = icmp eq i64 %10, 0
+ br i1 %18, label %30, label %19
+
+19: ; preds = %14, %19
+ %20 = phi i64 [ %27, %19 ], [ %16, %14 ]
+ %21 = phi <8 x i32> [ %26, %19 ], [ %17, %14 ]
+ %22 = phi i64 [ %28, %19 ], [ 0, %14 ]
+ %23 = getelementptr inbounds <4 x i64>, ptr %3, i64 %20
+ %24 = load <16 x i16>, ptr %23, align 32
+ %25 = tail call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %7, <16 x i16> %24)
+ %26 = add <8 x i32> %25, %21
+ %27 = add nuw nsw i64 %20, 1
+ %28 = add i64 %22, 1
+ %29 = icmp eq i64 %28, %10
+ br i1 %29, label %30, label %19
+
+30: ; preds = %19, %14
+ %31 = phi <8 x i32> [ %15, %14 ], [ %26, %19 ]
+ %32 = bitcast <8 x i32> %31 to <4 x i64>
+ br label %33
+
+33: ; preds = %30, %4
+ %34 = phi <4 x i64> [ %32, %30 ], [ %1, %4 ]
+ ret <4 x i64> %34
+
+35: ; preds = %35, %12
+ %36 = phi i64 [ 0, %12 ], [ %58, %35 ]
+ %37 = phi <8 x i32> [ %8, %12 ], [ %57, %35 ]
+ %38 = phi i64 [ 0, %12 ], [ %59, %35 ]
+ %39 = getelementptr inbounds <4 x i64>, ptr %3, i64 %36
+ %40 = load <16 x i16>, ptr %39, align 32
+ %41 = tail call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %7, <16 x i16> %40)
+ %42 = add <8 x i32> %41, %37
+ %43 = or i64 %36, 1
+ %44 = getelementptr inbounds <4 x i64>, ptr %3, i64 %43
+ %45 = load <16 x i16>, ptr %44, align 32
+ %46 = tail call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %7, <16 x i16> %45)
+ %47 = add <8 x i32> %46, %42
+ %48 = or i64 %36, 2
+ %49 = getelementptr inbounds <4 x i64>, ptr %3, i64 %48
+ %50 = load <16 x i16>, ptr %49, align 32
+ %51 = tail call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %7, <16 x i16> %50)
+ %52 = add <8 x i32> %51, %47
+ %53 = or i64 %36, 3
+ %54 = getelementptr inbounds <4 x i64>, ptr %3, i64 %53
+ %55 = load <16 x i16>, ptr %54, align 32
+ %56 = tail call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %7, <16 x i16> %55)
+ %57 = add <8 x i32> %56, %52
+ %58 = add nuw nsw i64 %36, 4
+ %59 = add i64 %38, 4
+ %60 = icmp eq i64 %59, %13
+ br i1 %60, label %14, label %35
+}
+
+declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>)
More information about the llvm-commits
mailing list