[llvm] r265048 - [x86] add memset tests to show another potential improvement

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 31 13:40:33 PDT 2016


Author: spatel
Date: Thu Mar 31 15:40:32 2016
New Revision: 265048

URL: http://llvm.org/viewvc/llvm-project?rev=265048&view=rev
Log:
[x86] add memset tests to show another potential improvement

Modified:
    llvm/trunk/test/CodeGen/X86/memset-nonzero.ll

Modified: llvm/trunk/test/CodeGen/X86/memset-nonzero.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memset-nonzero.ll?rev=265048&r1=265047&r2=265048&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memset-nonzero.ll (original)
+++ llvm/trunk/test/CodeGen/X86/memset-nonzero.ll Thu Mar 31 15:40:32 2016
@@ -3,6 +3,8 @@
 ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx | FileCheck %s --check-prefix=ANY --check-prefix=AVX --check-prefix=AVX1
 ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx2 | FileCheck %s --check-prefix=ANY --check-prefix=AVX --check-prefix=AVX2
 
+; https://llvm.org/bugs/show_bug.cgi?id=27100
+
 define void @memset_16_nonzero_bytes(i8* %x) {
 ; SSE2-LABEL: memset_16_nonzero_bytes:
 ; SSE2:         movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
@@ -166,3 +168,204 @@ define void @memset_256_nonzero_bytes(i8
 
 declare i8* @__memset_chk(i8*, i32, i64, i64)
 
+; Repeat with a non-constant value for the stores.
+
+define void @memset_16_nonconst_bytes(i8* %x, i8 %c) {
+; SSE2-LABEL: memset_16_nonconst_bytes:
+; SSE2:         movzbl %sil, %eax
+; SSE2-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; SSE2-NEXT:    imulq %rax, %rcx
+; SSE2-NEXT:    movq %rcx, 8(%rdi)
+; SSE2-NEXT:    movq %rcx, (%rdi)
+; SSE2-NEXT:    retq
+;
+; AVX1-LABEL: memset_16_nonconst_bytes:
+; AVX1:         movzbl %sil, %eax
+; AVX1-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX1-NEXT:    vmovd %eax, %xmm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vmovdqu %xmm0, (%rdi)
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: memset_16_nonconst_bytes:
+; AVX2:         movzbl %sil, %eax
+; AVX2-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX2-NEXT:    vmovd %eax, %xmm0
+; AVX2-NEXT:    vbroadcastss %xmm0, %xmm0
+; AVX2-NEXT:    vmovups %xmm0, (%rdi)
+; AVX2-NEXT:    retq
+;
+  tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 16, i32 1, i1 false)
+  ret void
+}
+
+define void @memset_32_nonconst_bytes(i8* %x, i8 %c) {
+; SSE2-LABEL: memset_32_nonconst_bytes:
+; SSE2:         movzbl %sil, %eax
+; SSE2-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; SSE2-NEXT:    imulq %rax, %rcx
+; SSE2-NEXT:    movq %rcx, 24(%rdi)
+; SSE2-NEXT:    movq %rcx, 16(%rdi)
+; SSE2-NEXT:    movq %rcx, 8(%rdi)
+; SSE2-NEXT:    movq %rcx, (%rdi)
+; SSE2-NEXT:    retq
+;
+; AVX1-LABEL: memset_32_nonconst_bytes:
+; AVX1:         movzbl %sil, %eax
+; AVX1-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX1-NEXT:    vmovd %eax, %xmm0
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rdi)
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: memset_32_nonconst_bytes:
+; AVX2:         movzbl %sil, %eax
+; AVX2-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX2-NEXT:    vmovd %eax, %xmm0
+; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
+; AVX2-NEXT:    vmovups %ymm0, (%rdi)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+  tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 32, i32 1, i1 false)
+  ret void
+}
+
+define void @memset_64_nonconst_bytes(i8* %x, i8 %c) {
+; SSE2-LABEL: memset_64_nonconst_bytes:
+; SSE2:         movzbl %sil, %eax
+; SSE2-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; SSE2-NEXT:    imulq %rax, %rcx
+; SSE2-NEXT:    movq %rcx, 56(%rdi)
+; SSE2-NEXT:    movq %rcx, 48(%rdi)
+; SSE2-NEXT:    movq %rcx, 40(%rdi)
+; SSE2-NEXT:    movq %rcx, 32(%rdi)
+; SSE2-NEXT:    movq %rcx, 24(%rdi)
+; SSE2-NEXT:    movq %rcx, 16(%rdi)
+; SSE2-NEXT:    movq %rcx, 8(%rdi)
+; SSE2-NEXT:    movq %rcx, (%rdi)
+; SSE2-NEXT:    retq
+;
+; AVX1-LABEL: memset_64_nonconst_bytes:
+; AVX1:         movzbl %sil, %eax
+; AVX1-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX1-NEXT:    vmovd %eax, %xmm0
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, 32(%rdi)
+; AVX1-NEXT:    vmovups %ymm0, (%rdi)
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: memset_64_nonconst_bytes:
+; AVX2:         movzbl %sil, %eax
+; AVX2-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX2-NEXT:    vmovd %eax, %xmm0
+; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
+; AVX2-NEXT:    vmovups %ymm0, 32(%rdi)
+; AVX2-NEXT:    vmovups %ymm0, (%rdi)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+  tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 64, i32 1, i1 false)
+  ret void
+}
+
+define void @memset_128_nonconst_bytes(i8* %x, i8 %c) {
+; SSE2-LABEL: memset_128_nonconst_bytes:
+; SSE2:         movzbl %sil, %eax
+; SSE2-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; SSE2-NEXT:    imulq %rax, %rcx
+; SSE2-NEXT:    movq %rcx, 120(%rdi)
+; SSE2-NEXT:    movq %rcx, 112(%rdi)
+; SSE2-NEXT:    movq %rcx, 104(%rdi)
+; SSE2-NEXT:    movq %rcx, 96(%rdi)
+; SSE2-NEXT:    movq %rcx, 88(%rdi)
+; SSE2-NEXT:    movq %rcx, 80(%rdi)
+; SSE2-NEXT:    movq %rcx, 72(%rdi)
+; SSE2-NEXT:    movq %rcx, 64(%rdi)
+; SSE2-NEXT:    movq %rcx, 56(%rdi)
+; SSE2-NEXT:    movq %rcx, 48(%rdi)
+; SSE2-NEXT:    movq %rcx, 40(%rdi)
+; SSE2-NEXT:    movq %rcx, 32(%rdi)
+; SSE2-NEXT:    movq %rcx, 24(%rdi)
+; SSE2-NEXT:    movq %rcx, 16(%rdi)
+; SSE2-NEXT:    movq %rcx, 8(%rdi)
+; SSE2-NEXT:    movq %rcx, (%rdi)
+; SSE2-NEXT:    retq
+;
+; AVX1-LABEL: memset_128_nonconst_bytes:
+; AVX1:         movzbl %sil, %eax
+; AVX1-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX1-NEXT:    vmovd %eax, %xmm0
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, 96(%rdi)
+; AVX1-NEXT:    vmovups %ymm0, 64(%rdi)
+; AVX1-NEXT:    vmovups %ymm0, 32(%rdi)
+; AVX1-NEXT:    vmovups %ymm0, (%rdi)
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: memset_128_nonconst_bytes:
+; AVX2:         movzbl %sil, %eax
+; AVX2-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX2-NEXT:    vmovd %eax, %xmm0
+; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
+; AVX2-NEXT:    vmovups %ymm0, 96(%rdi)
+; AVX2-NEXT:    vmovups %ymm0, 64(%rdi)
+; AVX2-NEXT:    vmovups %ymm0, 32(%rdi)
+; AVX2-NEXT:    vmovups %ymm0, (%rdi)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+  tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 128, i32 1, i1 false)
+  ret void
+}
+
+define void @memset_256_nonconst_bytes(i8* %x, i8 %c) {
+; SSE2-LABEL: memset_256_nonconst_bytes:
+; SSE2:         movl $256, %edx # imm = 0x100
+; SSE2-NEXT:    jmp memset # TAILCALL
+;
+; AVX1-LABEL: memset_256_nonconst_bytes:
+; AVX1:         movzbl %sil, %eax
+; AVX1-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX1-NEXT:    vmovd %eax, %xmm0
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, 224(%rdi)
+; AVX1-NEXT:    vmovups %ymm0, 192(%rdi)
+; AVX1-NEXT:    vmovups %ymm0, 160(%rdi)
+; AVX1-NEXT:    vmovups %ymm0, 128(%rdi)
+; AVX1-NEXT:    vmovups %ymm0, 96(%rdi)
+; AVX1-NEXT:    vmovups %ymm0, 64(%rdi)
+; AVX1-NEXT:    vmovups %ymm0, 32(%rdi)
+; AVX1-NEXT:    vmovups %ymm0, (%rdi)
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: memset_256_nonconst_bytes:
+; AVX2:         movzbl %sil, %eax
+; AVX2-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX2-NEXT:    vmovd %eax, %xmm0
+; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
+; AVX2-NEXT:    vmovups %ymm0, 224(%rdi)
+; AVX2-NEXT:    vmovups %ymm0, 192(%rdi)
+; AVX2-NEXT:    vmovups %ymm0, 160(%rdi)
+; AVX2-NEXT:    vmovups %ymm0, 128(%rdi)
+; AVX2-NEXT:    vmovups %ymm0, 96(%rdi)
+; AVX2-NEXT:    vmovups %ymm0, 64(%rdi)
+; AVX2-NEXT:    vmovups %ymm0, 32(%rdi)
+; AVX2-NEXT:    vmovups %ymm0, (%rdi)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+  tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 256, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
+




More information about the llvm-commits mailing list