[llvm] r265048 - [x86] add memset tests to show another potential improvement
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 31 13:40:33 PDT 2016
Author: spatel
Date: Thu Mar 31 15:40:32 2016
New Revision: 265048
URL: http://llvm.org/viewvc/llvm-project?rev=265048&view=rev
Log:
[x86] add memset tests to show another potential improvement
Modified:
llvm/trunk/test/CodeGen/X86/memset-nonzero.ll
Modified: llvm/trunk/test/CodeGen/X86/memset-nonzero.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memset-nonzero.ll?rev=265048&r1=265047&r2=265048&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memset-nonzero.ll (original)
+++ llvm/trunk/test/CodeGen/X86/memset-nonzero.ll Thu Mar 31 15:40:32 2016
@@ -3,6 +3,8 @@
; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx | FileCheck %s --check-prefix=ANY --check-prefix=AVX --check-prefix=AVX1
; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx2 | FileCheck %s --check-prefix=ANY --check-prefix=AVX --check-prefix=AVX2
+; https://llvm.org/bugs/show_bug.cgi?id=27100
+
define void @memset_16_nonzero_bytes(i8* %x) {
; SSE2-LABEL: memset_16_nonzero_bytes:
; SSE2: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
@@ -166,3 +168,204 @@ define void @memset_256_nonzero_bytes(i8
declare i8* @__memset_chk(i8*, i32, i64, i64)
+; Repeat with a non-constant value for the stores.
+
+define void @memset_16_nonconst_bytes(i8* %x, i8 %c) {
+; SSE2-LABEL: memset_16_nonconst_bytes:
+; SSE2: movzbl %sil, %eax
+; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; SSE2-NEXT: imulq %rax, %rcx
+; SSE2-NEXT: movq %rcx, 8(%rdi)
+; SSE2-NEXT: movq %rcx, (%rdi)
+; SSE2-NEXT: retq
+;
+; AVX1-LABEL: memset_16_nonconst_bytes:
+; AVX1: movzbl %sil, %eax
+; AVX1-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: memset_16_nonconst_bytes:
+; AVX2: movzbl %sil, %eax
+; AVX2-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: vbroadcastss %xmm0, %xmm0
+; AVX2-NEXT: vmovups %xmm0, (%rdi)
+; AVX2-NEXT: retq
+;
+ tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 16, i32 1, i1 false)
+ ret void
+}
+
+define void @memset_32_nonconst_bytes(i8* %x, i8 %c) {
+; SSE2-LABEL: memset_32_nonconst_bytes:
+; SSE2: movzbl %sil, %eax
+; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; SSE2-NEXT: imulq %rax, %rcx
+; SSE2-NEXT: movq %rcx, 24(%rdi)
+; SSE2-NEXT: movq %rcx, 16(%rdi)
+; SSE2-NEXT: movq %rcx, 8(%rdi)
+; SSE2-NEXT: movq %rcx, (%rdi)
+; SSE2-NEXT: retq
+;
+; AVX1-LABEL: memset_32_nonconst_bytes:
+; AVX1: movzbl %sil, %eax
+; AVX1-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: vmovups %ymm0, (%rdi)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: memset_32_nonconst_bytes:
+; AVX2: movzbl %sil, %eax
+; AVX2-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
+; AVX2-NEXT: vmovups %ymm0, (%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+ tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 32, i32 1, i1 false)
+ ret void
+}
+
+define void @memset_64_nonconst_bytes(i8* %x, i8 %c) {
+; SSE2-LABEL: memset_64_nonconst_bytes:
+; SSE2: movzbl %sil, %eax
+; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; SSE2-NEXT: imulq %rax, %rcx
+; SSE2-NEXT: movq %rcx, 56(%rdi)
+; SSE2-NEXT: movq %rcx, 48(%rdi)
+; SSE2-NEXT: movq %rcx, 40(%rdi)
+; SSE2-NEXT: movq %rcx, 32(%rdi)
+; SSE2-NEXT: movq %rcx, 24(%rdi)
+; SSE2-NEXT: movq %rcx, 16(%rdi)
+; SSE2-NEXT: movq %rcx, 8(%rdi)
+; SSE2-NEXT: movq %rcx, (%rdi)
+; SSE2-NEXT: retq
+;
+; AVX1-LABEL: memset_64_nonconst_bytes:
+; AVX1: movzbl %sil, %eax
+; AVX1-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
+; AVX1-NEXT: vmovups %ymm0, (%rdi)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: memset_64_nonconst_bytes:
+; AVX2: movzbl %sil, %eax
+; AVX2-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
+; AVX2-NEXT: vmovups %ymm0, 32(%rdi)
+; AVX2-NEXT: vmovups %ymm0, (%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+ tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 64, i32 1, i1 false)
+ ret void
+}
+
+define void @memset_128_nonconst_bytes(i8* %x, i8 %c) {
+; SSE2-LABEL: memset_128_nonconst_bytes:
+; SSE2: movzbl %sil, %eax
+; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; SSE2-NEXT: imulq %rax, %rcx
+; SSE2-NEXT: movq %rcx, 120(%rdi)
+; SSE2-NEXT: movq %rcx, 112(%rdi)
+; SSE2-NEXT: movq %rcx, 104(%rdi)
+; SSE2-NEXT: movq %rcx, 96(%rdi)
+; SSE2-NEXT: movq %rcx, 88(%rdi)
+; SSE2-NEXT: movq %rcx, 80(%rdi)
+; SSE2-NEXT: movq %rcx, 72(%rdi)
+; SSE2-NEXT: movq %rcx, 64(%rdi)
+; SSE2-NEXT: movq %rcx, 56(%rdi)
+; SSE2-NEXT: movq %rcx, 48(%rdi)
+; SSE2-NEXT: movq %rcx, 40(%rdi)
+; SSE2-NEXT: movq %rcx, 32(%rdi)
+; SSE2-NEXT: movq %rcx, 24(%rdi)
+; SSE2-NEXT: movq %rcx, 16(%rdi)
+; SSE2-NEXT: movq %rcx, 8(%rdi)
+; SSE2-NEXT: movq %rcx, (%rdi)
+; SSE2-NEXT: retq
+;
+; AVX1-LABEL: memset_128_nonconst_bytes:
+; AVX1: movzbl %sil, %eax
+; AVX1-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
+; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
+; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
+; AVX1-NEXT: vmovups %ymm0, (%rdi)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: memset_128_nonconst_bytes:
+; AVX2: movzbl %sil, %eax
+; AVX2-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
+; AVX2-NEXT: vmovups %ymm0, 96(%rdi)
+; AVX2-NEXT: vmovups %ymm0, 64(%rdi)
+; AVX2-NEXT: vmovups %ymm0, 32(%rdi)
+; AVX2-NEXT: vmovups %ymm0, (%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+ tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 128, i32 1, i1 false)
+ ret void
+}
+
+define void @memset_256_nonconst_bytes(i8* %x, i8 %c) {
+; SSE2-LABEL: memset_256_nonconst_bytes:
+; SSE2: movl $256, %edx # imm = 0x100
+; SSE2-NEXT: jmp memset # TAILCALL
+;
+; AVX1-LABEL: memset_256_nonconst_bytes:
+; AVX1: movzbl %sil, %eax
+; AVX1-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: vmovups %ymm0, 224(%rdi)
+; AVX1-NEXT: vmovups %ymm0, 192(%rdi)
+; AVX1-NEXT: vmovups %ymm0, 160(%rdi)
+; AVX1-NEXT: vmovups %ymm0, 128(%rdi)
+; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
+; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
+; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
+; AVX1-NEXT: vmovups %ymm0, (%rdi)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: memset_256_nonconst_bytes:
+; AVX2: movzbl %sil, %eax
+; AVX2-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
+; AVX2-NEXT: vmovups %ymm0, 224(%rdi)
+; AVX2-NEXT: vmovups %ymm0, 192(%rdi)
+; AVX2-NEXT: vmovups %ymm0, 160(%rdi)
+; AVX2-NEXT: vmovups %ymm0, 128(%rdi)
+; AVX2-NEXT: vmovups %ymm0, 96(%rdi)
+; AVX2-NEXT: vmovups %ymm0, 64(%rdi)
+; AVX2-NEXT: vmovups %ymm0, 32(%rdi)
+; AVX2-NEXT: vmovups %ymm0, (%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+ tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 256, i32 1, i1 false)
+ ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
+
More information about the llvm-commits
mailing list