[llvm] r349016 - [CodeGen] Allow mempcy/memset to generate small overlapping stores.
Clement Courbet via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 13 01:56:19 PST 2018
Author: courbet
Date: Thu Dec 13 01:56:19 2018
New Revision: 349016
URL: http://llvm.org/viewvc/llvm-project?rev=349016&view=rev
Log:
[CodeGen] Allow mempcy/memset to generate small overlapping stores.
Summary:
All targets either just return false here or properly model `Fast`, so I
don't think there is any reason to prevent CodeGen from doing the right
thing here.
Subscribers: nemanjai, javed.absar, eraman, jsji, llvm-commits
Differential Revision: https://reviews.llvm.org/D55365
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/trunk/test/CodeGen/AArch64/arm64-memcpy-inline.ll
llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll
llvm/trunk/test/CodeGen/ARM/memcpy-ldm-stm.ll
llvm/trunk/test/CodeGen/PowerPC/jaggedstructs.ll
llvm/trunk/test/CodeGen/PowerPC/structsinmem.ll
llvm/trunk/test/CodeGen/PowerPC/structsinregs.ll
llvm/trunk/test/CodeGen/X86/memcpy-from-string.ll
llvm/trunk/test/CodeGen/X86/memset-2.ll
llvm/trunk/test/CodeGen/X86/memset-zero.ll
llvm/trunk/test/CodeGen/X86/unaligned-load.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=349016&r1=349015&r2=349016&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Thu Dec 13 01:56:19 2018
@@ -5491,12 +5491,10 @@ static bool FindOptimalMemOpLowering(std
// If the new VT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.
- // FIXME: Only does this for 64-bit or more since we don't have proper
- // cost model for unaligned load / store.
bool Fast;
- if (NumMemOps && AllowOverlap &&
- VTSize >= 8 && NewVTSize < Size &&
- TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && Fast)
+ if (NumMemOps && AllowOverlap && NewVTSize < Size &&
+ TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) &&
+ Fast)
VTSize = Size;
else {
VT = NewVT;
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-memcpy-inline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-memcpy-inline.ll?rev=349016&r1=349015&r2=349016&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-memcpy-inline.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-memcpy-inline.ll Thu Dec 13 01:56:19 2018
@@ -16,10 +16,8 @@
define i32 @t0() {
entry:
; CHECK-LABEL: t0:
-; CHECK: ldrb [[REG0:w[0-9]+]], [x[[BASEREG:[0-9]+]], #10]
-; CHECK: strb [[REG0]], [x[[BASEREG2:[0-9]+]], #10]
-; CHECK: ldrh [[REG1:w[0-9]+]], [x[[BASEREG]], #8]
-; CHECK: strh [[REG1]], [x[[BASEREG2]], #8]
+; CHECK: ldur [[REG0:w[0-9]+]], [x[[BASEREG:[0-9]+]], #7]
+; CHECK: stur [[REG0]], [x[[BASEREG2:[0-9]+]], #7]
; CHECK: ldr [[REG2:x[0-9]+]],
; CHECK: str [[REG2]],
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i1 false)
@@ -74,9 +72,9 @@ entry:
define void @t5(i8* nocapture %C) nounwind {
entry:
; CHECK-LABEL: t5:
-; CHECK: strb wzr, [x0, #6]
-; CHECK: mov [[REG7:w[0-9]+]], #21587
-; CHECK: strh [[REG7]], [x0, #4]
+; CHECK: mov [[REG7:w[0-9]+]], #21337
+; CHECK: movk [[REG7]],
+; CHECK: stur [[REG7]], [x0, #3]
; CHECK: mov [[REG8:w[0-9]+]],
; CHECK: movk [[REG8]],
; CHECK: str [[REG8]], [x0]
Modified: llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll?rev=349016&r1=349015&r2=349016&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll Thu Dec 13 01:56:19 2018
@@ -86,10 +86,9 @@ entry:
define void @t5(i8* nocapture %C) nounwind {
entry:
; CHECK-LABEL: t5:
-; CHECK: movs [[REG5:r[0-9]+]], #0
-; CHECK: strb [[REG5]], [r0, #6]
-; CHECK: movw [[REG6:r[0-9]+]], #21587
-; CHECK: strh [[REG6]], [r0, #4]
+; CHECK: movw [[REG5:r[0-9]+]], #21337
+; CHECK: movt [[REG5]], #84
+; CHECK: str.w [[REG5]], [r0, #3]
; CHECK: movw [[REG7:r[0-9]+]], #18500
; CHECK: movt [[REG7:r[0-9]+]], #22866
; CHECK: str [[REG7]]
Modified: llvm/trunk/test/CodeGen/ARM/memcpy-ldm-stm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/memcpy-ldm-stm.ll?rev=349016&r1=349015&r2=349016&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/memcpy-ldm-stm.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/memcpy-ldm-stm.ll Thu Dec 13 01:56:19 2018
@@ -34,14 +34,16 @@ entry:
; CHECK-LABEL: t2:
; CHECKV6: ldr [[LB:r[0-7]]],
; CHECKV6-NEXT: ldr [[SB:r[0-7]]],
+; CHECKV6-NEXT: ldm{{(\.w)?}} [[LB]]!,
+; CHECKV6-NEXT: stm{{(\.w)?}} [[SB]]!,
+; CHECKV6-NEXT: ldrh{{(\.w)?}} {{.*}}, {{\[}}[[LB]]]
+; CHECKV6-NEXT: ldrb{{(\.w)?}} {{.*}}, {{\[}}[[LB]], #2]
+; CHECKV6-NEXT: strb{{(\.w)?}} {{.*}}, {{\[}}[[SB]], #2]
+; CHECKV6-NEXT: strh{{(\.w)?}} {{.*}}, {{\[}}[[SB]]]
; CHECKV7: movt [[LB:[rl0-9]+]], :upper16:d
; CHECKV7-NEXT: movt [[SB:[rl0-9]+]], :upper16:s
-; CHECK-NEXT: ldm{{(\.w)?}} [[LB]]!,
-; CHECK-NEXT: stm{{(\.w)?}} [[SB]]!,
-; CHECK-NEXT: ldrh{{(\.w)?}} {{.*}}, {{\[}}[[LB]]]
-; CHECK-NEXT: ldrb{{(\.w)?}} {{.*}}, {{\[}}[[LB]], #2]
-; CHECK-NEXT: strb{{(\.w)?}} {{.*}}, {{\[}}[[SB]], #2]
-; CHECK-NEXT: strh{{(\.w)?}} {{.*}}, {{\[}}[[SB]]]
+; CHECKV7: ldr{{(\.w)?}} {{.*}}, {{\[}}[[LB]], #11]
+; CHECKV7-NEXT: str{{(\.w)?}} {{.*}}, {{\[}}[[SB]], #11]
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast ([64 x i32]* @s to i8*), i8* align 4 bitcast ([64 x i32]* @d to i8*), i32 15, i1 false)
ret void
}
Modified: llvm/trunk/test/CodeGen/PowerPC/jaggedstructs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/jaggedstructs.ll?rev=349016&r1=349015&r2=349016&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/jaggedstructs.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/jaggedstructs.ll Thu Dec 13 01:56:19 2018
@@ -34,11 +34,9 @@ entry:
; CHECK-DAG: lwz {{[0-9]+}}, 178(1)
; CHECK-DAG: sth {{[0-9]+}}, 70(1)
; CHECK-DAG: stw {{[0-9]+}}, 66(1)
-; CHECK-DAG: lbz {{[0-9]+}}, 191(1)
-; CHECK-DAG: lhz {{[0-9]+}}, 189(1)
+; CHECK-DAG: lwz {{[0-9]+}}, 188(1)
; CHECK-DAG: lwz {{[0-9]+}}, 185(1)
-; CHECK-DAG: stb {{[0-9]+}}, 79(1)
-; CHECK-DAG: sth {{[0-9]+}}, 77(1)
+; CHECK-DAG: stw {{[0-9]+}}, 76(1)
; CHECK-DAG: stw {{[0-9]+}}, 73(1)
; CHECK-DAG: ld 6, 72(1)
; CHECK-DAG: ld 5, 64(1)
Modified: llvm/trunk/test/CodeGen/PowerPC/structsinmem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/structsinmem.ll?rev=349016&r1=349015&r2=349016&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/structsinmem.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/structsinmem.ll Thu Dec 13 01:56:19 2018
@@ -157,8 +157,7 @@ entry:
; CHECK: stw {{[0-9]+}}, 147(1)
; CHECK: sth {{[0-9]+}}, 158(1)
; CHECK: stw {{[0-9]+}}, 154(1)
-; CHECK: stb {{[0-9]+}}, 167(1)
-; CHECK: sth {{[0-9]+}}, 165(1)
+; CHECK: stw {{[0-9]+}}, 164(1)
; CHECK: stw {{[0-9]+}}, 161(1)
}
Modified: llvm/trunk/test/CodeGen/PowerPC/structsinregs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/structsinregs.ll?rev=349016&r1=349015&r2=349016&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/structsinregs.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/structsinregs.ll Thu Dec 13 01:56:19 2018
@@ -148,8 +148,7 @@ entry:
; CHECK: stw {{[0-9]+}}, 83(1)
; CHECK: sth {{[0-9]+}}, 94(1)
; CHECK: stw {{[0-9]+}}, 90(1)
-; CHECK: stb {{[0-9]+}}, 103(1)
-; CHECK: sth {{[0-9]+}}, 101(1)
+; CHECK: stw {{[0-9]+}}, 100(1)
; CHECK: stw {{[0-9]+}}, 97(1)
; CHECK: ld 9, 96(1)
; CHECK: ld 8, 88(1)
Modified: llvm/trunk/test/CodeGen/X86/memcpy-from-string.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memcpy-from-string.ll?rev=349016&r1=349015&r2=349016&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memcpy-from-string.ll (original)
+++ llvm/trunk/test/CodeGen/X86/memcpy-from-string.ll Thu Dec 13 01:56:19 2018
@@ -16,8 +16,7 @@ target triple = "x86_64-unknown-linux-gn
define void @foo(i8* %tmp2) {
; X86-LABEL: foo:
; X86: # %bb.0:
-; X86-NEXT: movb $0, 6(%rdi)
-; X86-NEXT: movw $15212, 4(%rdi) # imm = 0x3B6C
+; X86-NEXT: movl $3894379, 3(%rdi) # imm = 0x3B6C6B
; X86-NEXT: movl $1802117222, (%rdi) # imm = 0x6B6A2066
; X86-NEXT: retq
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @0, i64 0, i64 3), i64 7, i1 false)
Modified: llvm/trunk/test/CodeGen/X86/memset-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memset-2.ll?rev=349016&r1=349015&r2=349016&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memset-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/memset-2.ll Thu Dec 13 01:56:19 2018
@@ -51,11 +51,10 @@ define void @t4(i8* nocapture %s, i8 %a)
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: imull $16843009, %ecx, %ecx ## imm = 0x1010101
+; CHECK-NEXT: movl %ecx, 11(%eax)
; CHECK-NEXT: movl %ecx, 8(%eax)
; CHECK-NEXT: movl %ecx, 4(%eax)
; CHECK-NEXT: movl %ecx, (%eax)
-; CHECK-NEXT: movw %cx, 12(%eax)
-; CHECK-NEXT: movb %cl, 14(%eax)
; CHECK-NEXT: retl
entry:
tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 15, i1 false)
Modified: llvm/trunk/test/CodeGen/X86/memset-zero.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memset-zero.ll?rev=349016&r1=349015&r2=349016&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memset-zero.ll (original)
+++ llvm/trunk/test/CodeGen/X86/memset-zero.ll Thu Dec 13 01:56:19 2018
@@ -71,22 +71,19 @@ define void @memset_7(i8* %a) nounwind
; X86-LABEL: memset_7:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb $0, 6(%eax)
-; X86-NEXT: movw $0, 4(%eax)
+; X86-NEXT: movl $0, 3(%eax)
; X86-NEXT: movl $0, (%eax)
; X86-NEXT: retl
;
; CORE2-LABEL: memset_7:
; CORE2: # %bb.0: # %entry
-; CORE2-NEXT: movb $0, 6(%rdi)
-; CORE2-NEXT: movw $0, 4(%rdi)
+; CORE2-NEXT: movl $0, 3(%rdi)
; CORE2-NEXT: movl $0, (%rdi)
; CORE2-NEXT: retq
;
; NEHALEM-LABEL: memset_7:
; NEHALEM: # %bb.0: # %entry
-; NEHALEM-NEXT: movb $0, 6(%rdi)
-; NEHALEM-NEXT: movw $0, 4(%rdi)
+; NEHALEM-NEXT: movl $0, 3(%rdi)
; NEHALEM-NEXT: movl $0, (%rdi)
; NEHALEM-NEXT: retq
entry:
@@ -120,23 +117,20 @@ define void @memset_11(i8* %a) nounwind
; X86-LABEL: memset_11:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb $0, 10(%eax)
-; X86-NEXT: movw $0, 8(%eax)
+; X86-NEXT: movl $0, 7(%eax)
; X86-NEXT: movl $0, 4(%eax)
; X86-NEXT: movl $0, (%eax)
; X86-NEXT: retl
;
; CORE2-LABEL: memset_11:
; CORE2: # %bb.0: # %entry
-; CORE2-NEXT: movb $0, 10(%rdi)
-; CORE2-NEXT: movw $0, 8(%rdi)
+; CORE2-NEXT: movl $0, 7(%rdi)
; CORE2-NEXT: movq $0, (%rdi)
; CORE2-NEXT: retq
;
; NEHALEM-LABEL: memset_11:
; NEHALEM: # %bb.0: # %entry
-; NEHALEM-NEXT: movb $0, 10(%rdi)
-; NEHALEM-NEXT: movw $0, 8(%rdi)
+; NEHALEM-NEXT: movl $0, 7(%rdi)
; NEHALEM-NEXT: movq $0, (%rdi)
; NEHALEM-NEXT: retq
entry:
@@ -174,8 +168,7 @@ define void @memset_15(i8* %a) nounwind
; X86-LABEL: memset_15:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb $0, 14(%eax)
-; X86-NEXT: movw $0, 12(%eax)
+; X86-NEXT: movl $0, 11(%eax)
; X86-NEXT: movl $0, 8(%eax)
; X86-NEXT: movl $0, 4(%eax)
; X86-NEXT: movl $0, (%eax)
@@ -256,8 +249,7 @@ define void @memset_19(i8* %a) nounwind
; X86-LABEL: memset_19:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb $0, 18(%eax)
-; X86-NEXT: movw $0, 16(%eax)
+; X86-NEXT: movl $0, 15(%eax)
; X86-NEXT: movl $0, 12(%eax)
; X86-NEXT: movl $0, 8(%eax)
; X86-NEXT: movl $0, 4(%eax)
@@ -266,8 +258,7 @@ define void @memset_19(i8* %a) nounwind
;
; CORE2-LABEL: memset_19:
; CORE2: # %bb.0: # %entry
-; CORE2-NEXT: movb $0, 18(%rdi)
-; CORE2-NEXT: movw $0, 16(%rdi)
+; CORE2-NEXT: movl $0, 15(%rdi)
; CORE2-NEXT: movq $0, 8(%rdi)
; CORE2-NEXT: movq $0, (%rdi)
; CORE2-NEXT: retq
@@ -276,8 +267,7 @@ define void @memset_19(i8* %a) nounwind
; NEHALEM: # %bb.0: # %entry
; NEHALEM-NEXT: xorps %xmm0, %xmm0
; NEHALEM-NEXT: movups %xmm0, (%rdi)
-; NEHALEM-NEXT: movb $0, 18(%rdi)
-; NEHALEM-NEXT: movw $0, 16(%rdi)
+; NEHALEM-NEXT: movl $0, 15(%rdi)
; NEHALEM-NEXT: retq
entry:
call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 19, i1 false)
@@ -288,8 +278,7 @@ define void @memset_31(i8* %a) nounwind
; X86-LABEL: memset_31:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb $0, 30(%eax)
-; X86-NEXT: movw $0, 28(%eax)
+; X86-NEXT: movl $0, 27(%eax)
; X86-NEXT: movl $0, 24(%eax)
; X86-NEXT: movl $0, 20(%eax)
; X86-NEXT: movl $0, 16(%eax)
@@ -322,8 +311,7 @@ define void @memset_35(i8* %a) nounwind
; X86-LABEL: memset_35:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb $0, 34(%eax)
-; X86-NEXT: movw $0, 32(%eax)
+; X86-NEXT: movl $0, 31(%eax)
; X86-NEXT: movl $0, 28(%eax)
; X86-NEXT: movl $0, 24(%eax)
; X86-NEXT: movl $0, 20(%eax)
@@ -336,8 +324,7 @@ define void @memset_35(i8* %a) nounwind
;
; CORE2-LABEL: memset_35:
; CORE2: # %bb.0: # %entry
-; CORE2-NEXT: movb $0, 34(%rdi)
-; CORE2-NEXT: movw $0, 32(%rdi)
+; CORE2-NEXT: movl $0, 31(%rdi)
; CORE2-NEXT: movq $0, 24(%rdi)
; CORE2-NEXT: movq $0, 16(%rdi)
; CORE2-NEXT: movq $0, 8(%rdi)
@@ -349,8 +336,7 @@ define void @memset_35(i8* %a) nounwind
; NEHALEM-NEXT: xorps %xmm0, %xmm0
; NEHALEM-NEXT: movups %xmm0, 16(%rdi)
; NEHALEM-NEXT: movups %xmm0, (%rdi)
-; NEHALEM-NEXT: movb $0, 34(%rdi)
-; NEHALEM-NEXT: movw $0, 32(%rdi)
+; NEHALEM-NEXT: movl $0, 31(%rdi)
; NEHALEM-NEXT: retq
entry:
call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 35, i1 false)
Modified: llvm/trunk/test/CodeGen/X86/unaligned-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/unaligned-load.ll?rev=349016&r1=349015&r2=349016&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/unaligned-load.ll (original)
+++ llvm/trunk/test/CodeGen/X86/unaligned-load.ll Thu Dec 13 01:56:19 2018
@@ -6,21 +6,23 @@
@.str1 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, SOME STRING\00", align 8
@.str3 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, 2'ND STRING\00", align 8
+; This can be improved; see PR39952.
+
define void @func() nounwind ssp {
; I386-LABEL: func:
; I386: ## %bb.0: ## %entry
-; I386-NEXT: pushl %esi
-; I386-NEXT: subl $40, %esp
-; I386-NEXT: leal {{[0-9]+}}(%esp), %esi
+; I386-NEXT: subl $32, %esp
; I386-NEXT: .p2align 4, 0x90
; I386-NEXT: LBB0_1: ## %bb
; I386-NEXT: ## =>This Inner Loop Header: Depth=1
-; I386-NEXT: subl $4, %esp
-; I386-NEXT: pushl $31
-; I386-NEXT: pushl $_.str3
-; I386-NEXT: pushl %esi
-; I386-NEXT: calll _memcpy
-; I386-NEXT: addl $16, %esp
+; I386-NEXT: movl $4673097, {{[0-9]+}}(%esp) ## imm = 0x474E49
+; I386-NEXT: movl $1230132307, {{[0-9]+}}(%esp) ## imm = 0x49525453
+; I386-NEXT: movl $541347367, {{[0-9]+}}(%esp) ## imm = 0x20444E27
+; I386-NEXT: movl $840969293, {{[0-9]+}}(%esp) ## imm = 0x32202C4D
+; I386-NEXT: movl $1095911247, {{[0-9]+}}(%esp) ## imm = 0x4152474F
+; I386-NEXT: movl $1380982853, {{[0-9]+}}(%esp) ## imm = 0x52502045
+; I386-NEXT: movl $1313821779, {{[0-9]+}}(%esp) ## imm = 0x4E4F5453
+; I386-NEXT: movl $1498564676, (%esp) ## imm = 0x59524844
; I386-NEXT: jmp LBB0_1
;
; CORE2-LABEL: func:
@@ -53,6 +55,59 @@ entry:
br label %bb
bb: ; preds = %bb, %entry
+ %String2Loc9 = getelementptr inbounds [31 x i8], [31 x i8]* %String2Loc, i64 0, i64 0
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str3, i64 0, i64 0), i64 31, i1 false)
+ br label %bb
+
+return: ; No predecessors!
+ ret void
+}
+
+define void @func_aligned() nounwind ssp {
+; I386-LABEL: func_aligned:
+; I386: ## %bb.0: ## %entry
+; I386-NEXT: subl $44, %esp
+; I386-NEXT: movaps {{.*#+}} xmm0 = [1498564676,1313821779,1380982853,1095911247]
+; I386-NEXT: .p2align 4, 0x90
+; I386-NEXT: LBB1_1: ## %bb
+; I386-NEXT: ## =>This Inner Loop Header: Depth=1
+; I386-NEXT: movaps %xmm0, (%esp)
+; I386-NEXT: movl $4673097, {{[0-9]+}}(%esp) ## imm = 0x474E49
+; I386-NEXT: movl $1230132307, {{[0-9]+}}(%esp) ## imm = 0x49525453
+; I386-NEXT: movl $541347367, {{[0-9]+}}(%esp) ## imm = 0x20444E27
+; I386-NEXT: movl $840969293, {{[0-9]+}}(%esp) ## imm = 0x32202C4D
+; I386-NEXT: jmp LBB1_1
+;
+; CORE2-LABEL: func_aligned:
+; CORE2: ## %bb.0: ## %entry
+; CORE2-NEXT: movabsq $20070800167293728, %rax ## imm = 0x474E4952545320
+; CORE2-NEXT: movabsq $2325069237881678925, %rcx ## imm = 0x20444E2732202C4D
+; CORE2-NEXT: movabsq $4706902966564560965, %rdx ## imm = 0x4152474F52502045
+; CORE2-NEXT: movabsq $5642821575076104260, %rsi ## imm = 0x4E4F545359524844
+; CORE2-NEXT: .p2align 4, 0x90
+; CORE2-NEXT: LBB1_1: ## %bb
+; CORE2-NEXT: ## =>This Inner Loop Header: Depth=1
+; CORE2-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
+; CORE2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
+; CORE2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
+; CORE2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
+; CORE2-NEXT: jmp LBB1_1
+;
+; COREI7-LABEL: func_aligned:
+; COREI7: ## %bb.0: ## %entry
+; COREI7-NEXT: movups _.str3+{{.*}}(%rip), %xmm0
+; COREI7-NEXT: movups {{.*}}(%rip), %xmm1
+; COREI7-NEXT: .p2align 4, 0x90
+; COREI7-NEXT: LBB1_1: ## %bb
+; COREI7-NEXT: ## =>This Inner Loop Header: Depth=1
+; COREI7-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
+; COREI7-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
+; COREI7-NEXT: jmp LBB1_1
+entry:
+ %String2Loc = alloca [31 x i8], align 16
+ br label %bb
+
+bb: ; preds = %bb, %entry
%String2Loc9 = getelementptr inbounds [31 x i8], [31 x i8]* %String2Loc, i64 0, i64 0
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str3, i64 0, i64 0), i64 31, i1 false)
br label %bb
More information about the llvm-commits
mailing list