[llvm] r321934 - [x86, MemCmpExpansion] allow 2 pairs of loads per block (PR33325)
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 6 08:16:05 PST 2018
Author: spatel
Date: Sat Jan 6 08:16:04 2018
New Revision: 321934
URL: http://llvm.org/viewvc/llvm-project?rev=321934&view=rev
Log:
[x86, MemCmpExpansion] allow 2 pairs of loads per block (PR33325)
This is the last step needed to fix PR33325:
https://bugs.llvm.org/show_bug.cgi?id=33325
We're trading branch and compares for loads and logic ops.
This makes the code smaller and hopefully faster in most cases.
The 24-byte test shows an interesting construct: we load the trailing scalar
elements into vector registers and generate the same pcmpeq+movmsk code that
we expected for a pair of full vector elements (see the 32- and 64-byte tests).
Differential Revision: https://reviews.llvm.org/D41714
Modified:
llvm/trunk/lib/CodeGen/ExpandMemCmp.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/test/CodeGen/X86/memcmp-optsize.ll
llvm/trunk/test/CodeGen/X86/memcmp.ll
llvm/trunk/test/Transforms/ExpandMemCmp/X86/memcmp.ll
Modified: llvm/trunk/lib/CodeGen/ExpandMemCmp.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ExpandMemCmp.cpp?rev=321934&r1=321933&r2=321934&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/ExpandMemCmp.cpp (original)
+++ llvm/trunk/lib/CodeGen/ExpandMemCmp.cpp Sat Jan 6 08:16:04 2018
@@ -564,12 +564,8 @@ Value *MemCmpExpansion::getMemCmpOneBloc
// This function expands the memcmp call into an inline expansion and returns
// the memcmp result.
Value *MemCmpExpansion::getMemCmpExpansion() {
- // A memcmp with zero-comparison with only one block of load and compare does
- // not need to set up any extra blocks. This case could be handled in the DAG,
- // but since we have all of the machinery to flexibly expand any memcpy here,
- // we choose to handle this case too to avoid fragmented lowering.
- if ((!IsUsedForZeroCmp && NumLoadsPerBlockForZeroCmp != 1) ||
- getNumBlocks() != 1) {
+ // Create the basic block framework for a multi-block expansion.
+ if (getNumBlocks() != 1) {
BasicBlock *StartBlock = CI->getParent();
EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
setupEndBlockPHINodes();
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=321934&r1=321933&r2=321934&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sat Jan 6 08:16:04 2018
@@ -829,6 +829,11 @@ namespace llvm {
/// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
MVT hasFastEqualityCompare(unsigned NumBits) const override;
+ /// Allow multiple load pairs per block for smaller and faster code.
+ unsigned getMemcmpEqZeroLoadsPerBlock() const override {
+ return 2;
+ }
+
/// Return the value type to use for ISD::SETCC.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
Modified: llvm/trunk/test/CodeGen/X86/memcmp-optsize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memcmp-optsize.ll?rev=321934&r1=321933&r2=321934&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memcmp-optsize.ll (original)
+++ llvm/trunk/test/CodeGen/X86/memcmp-optsize.ll Sat Jan 6 08:16:04 2018
@@ -160,35 +160,22 @@ define i1 @length3_eq(i8* %X, i8* %Y) no
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl (%ecx), %edx
-; X86-NEXT: cmpw (%eax), %dx
-; X86-NEXT: jne .LBB5_2
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movb 2(%ecx), %dl
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: cmpb 2(%eax), %dl
-; X86-NEXT: je .LBB5_3
-; X86-NEXT: .LBB5_2: # %res_block
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: incl %ecx
-; X86-NEXT: .LBB5_3: # %endblock
-; X86-NEXT: testl %ecx, %ecx
+; X86-NEXT: xorw (%eax), %dx
+; X86-NEXT: movb 2(%ecx), %cl
+; X86-NEXT: xorb 2(%eax), %cl
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: orw %dx, %ax
; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: length3_eq:
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: cmpw (%rsi), %ax
-; X64-NEXT: jne .LBB5_2
-; X64-NEXT: # %bb.1: # %loadbb1
+; X64-NEXT: xorw (%rsi), %ax
; X64-NEXT: movb 2(%rdi), %cl
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb 2(%rsi), %cl
-; X64-NEXT: je .LBB5_3
-; X64-NEXT: .LBB5_2: # %res_block
-; X64-NEXT: movl $1, %eax
-; X64-NEXT: .LBB5_3: # %endblock
-; X64-NEXT: testl %eax, %eax
+; X64-NEXT: xorb 2(%rsi), %cl
+; X64-NEXT: movzbl %cl, %ecx
+; X64-NEXT: orw %ax, %cx
; X64-NEXT: setne %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
@@ -318,35 +305,22 @@ define i1 @length5_eq(i8* %X, i8* %Y) no
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: cmpl (%eax), %edx
-; X86-NEXT: jne .LBB10_2
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movb 4(%ecx), %dl
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: cmpb 4(%eax), %dl
-; X86-NEXT: je .LBB10_3
-; X86-NEXT: .LBB10_2: # %res_block
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: incl %ecx
-; X86-NEXT: .LBB10_3: # %endblock
-; X86-NEXT: testl %ecx, %ecx
+; X86-NEXT: xorl (%eax), %edx
+; X86-NEXT: movb 4(%ecx), %cl
+; X86-NEXT: xorb 4(%eax), %cl
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: orl %edx, %eax
; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: length5_eq:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: cmpl (%rsi), %eax
-; X64-NEXT: jne .LBB10_2
-; X64-NEXT: # %bb.1: # %loadbb1
+; X64-NEXT: xorl (%rsi), %eax
; X64-NEXT: movb 4(%rdi), %cl
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb 4(%rsi), %cl
-; X64-NEXT: je .LBB10_3
-; X64-NEXT: .LBB10_2: # %res_block
-; X64-NEXT: movl $1, %eax
-; X64-NEXT: .LBB10_3: # %endblock
-; X64-NEXT: testl %eax, %eax
+; X64-NEXT: xorb 4(%rsi), %cl
+; X64-NEXT: movzbl %cl, %ecx
+; X64-NEXT: orl %eax, %ecx
; X64-NEXT: setne %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
@@ -404,18 +378,10 @@ define i1 @length8_eq(i8* %X, i8* %Y) no
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: cmpl (%eax), %edx
-; X86-NEXT: jne .LBB12_2
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movl 4(%ecx), %edx
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: cmpl 4(%eax), %edx
-; X86-NEXT: je .LBB12_3
-; X86-NEXT: .LBB12_2: # %res_block
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: incl %ecx
-; X86-NEXT: .LBB12_3: # %endblock
-; X86-NEXT: testl %ecx, %ecx
+; X86-NEXT: movl 4(%ecx), %ecx
+; X86-NEXT: xorl (%eax), %edx
+; X86-NEXT: xorl 4(%eax), %ecx
+; X86-NEXT: orl %edx, %ecx
; X86-NEXT: sete %al
; X86-NEXT: retl
;
@@ -433,18 +399,12 @@ define i1 @length8_eq(i8* %X, i8* %Y) no
define i1 @length8_eq_const(i8* %X) nounwind optsize {
; X86-LABEL: length8_eq_const:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl $858927408, (%ecx) # imm = 0x33323130
-; X86-NEXT: jne .LBB13_2
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl $926299444, 4(%ecx) # imm = 0x37363534
-; X86-NEXT: je .LBB13_3
-; X86-NEXT: .LBB13_2: # %res_block
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: incl %eax
-; X86-NEXT: .LBB13_3: # %endblock
-; X86-NEXT: testl %eax, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl $858927408, %ecx # imm = 0x33323130
+; X86-NEXT: xorl (%eax), %ecx
+; X86-NEXT: movl $926299444, %edx # imm = 0x37363534
+; X86-NEXT: xorl 4(%eax), %edx
+; X86-NEXT: orl %ecx, %edx
; X86-NEXT: setne %al
; X86-NEXT: retl
;
@@ -475,17 +435,10 @@ define i1 @length12_eq(i8* %X, i8* %Y) n
; X64-LABEL: length12_eq:
; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: cmpq (%rsi), %rax
-; X64-NEXT: jne .LBB14_2
-; X64-NEXT: # %bb.1: # %loadbb1
+; X64-NEXT: xorq (%rsi), %rax
; X64-NEXT: movl 8(%rdi), %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl 8(%rsi), %ecx
-; X64-NEXT: je .LBB14_3
-; X64-NEXT: .LBB14_2: # %res_block
-; X64-NEXT: movl $1, %eax
-; X64-NEXT: .LBB14_3: # %endblock
-; X64-NEXT: testl %eax, %eax
+; X64-NEXT: xorl 8(%rsi), %ecx
+; X64-NEXT: orq %rax, %rcx
; X64-NEXT: setne %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
@@ -703,37 +656,25 @@ define i1 @length24_eq(i8* %x, i8* %y) n
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
+; X64-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X64-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
+; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
+; X64-SSE2-NEXT: pand %xmm1, %xmm2
+; X64-SSE2-NEXT: pmovmskb %xmm2, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: jne .LBB20_2
-; X64-SSE2-NEXT: # %bb.1: # %loadbb1
-; X64-SSE2-NEXT: movq 16(%rdi), %rcx
-; X64-SSE2-NEXT: xorl %eax, %eax
-; X64-SSE2-NEXT: cmpq 16(%rsi), %rcx
-; X64-SSE2-NEXT: je .LBB20_3
-; X64-SSE2-NEXT: .LBB20_2: # %res_block
-; X64-SSE2-NEXT: movl $1, %eax
-; X64-SSE2-NEXT: .LBB20_3: # %endblock
-; X64-SSE2-NEXT: testl %eax, %eax
; X64-SSE2-NEXT: sete %al
; X64-SSE2-NEXT: retq
;
; X64-AVX2-LABEL: length24_eq:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64-AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
+; X64-AVX2-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
; X64-AVX2-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpmovmskb %xmm0, %eax
; X64-AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-AVX2-NEXT: jne .LBB20_2
-; X64-AVX2-NEXT: # %bb.1: # %loadbb1
-; X64-AVX2-NEXT: movq 16(%rdi), %rcx
-; X64-AVX2-NEXT: xorl %eax, %eax
-; X64-AVX2-NEXT: cmpq 16(%rsi), %rcx
-; X64-AVX2-NEXT: je .LBB20_3
-; X64-AVX2-NEXT: .LBB20_2: # %res_block
-; X64-AVX2-NEXT: movl $1, %eax
-; X64-AVX2-NEXT: .LBB20_3: # %endblock
-; X64-AVX2-NEXT: testl %eax, %eax
; X64-AVX2-NEXT: sete %al
; X64-AVX2-NEXT: retq
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
@@ -757,38 +698,28 @@ define i1 @length24_eq_const(i8* %X) nou
; X64-SSE2-LABEL: length24_eq_const:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X64-SSE2-NEXT: movabsq $3689065127958034230, %rax # imm = 0x3332313039383736
+; X64-SSE2-NEXT: movq %rax, %xmm2
+; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm2
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
+; X64-SSE2-NEXT: pand %xmm2, %xmm0
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: jne .LBB21_2
-; X64-SSE2-NEXT: # %bb.1: # %loadbb1
-; X64-SSE2-NEXT: xorl %eax, %eax
-; X64-SSE2-NEXT: movabsq $3689065127958034230, %rcx # imm = 0x3332313039383736
-; X64-SSE2-NEXT: cmpq %rcx, 16(%rdi)
-; X64-SSE2-NEXT: je .LBB21_3
-; X64-SSE2-NEXT: .LBB21_2: # %res_block
-; X64-SSE2-NEXT: movl $1, %eax
-; X64-SSE2-NEXT: .LBB21_3: # %endblock
-; X64-SSE2-NEXT: testl %eax, %eax
; X64-SSE2-NEXT: setne %al
; X64-SSE2-NEXT: retq
;
; X64-AVX2-LABEL: length24_eq_const:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64-AVX2-NEXT: movabsq $3689065127958034230, %rax # imm = 0x3332313039383736
+; X64-AVX2-NEXT: vmovq %rax, %xmm2
+; X64-AVX2-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpmovmskb %xmm0, %eax
; X64-AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-AVX2-NEXT: jne .LBB21_2
-; X64-AVX2-NEXT: # %bb.1: # %loadbb1
-; X64-AVX2-NEXT: xorl %eax, %eax
-; X64-AVX2-NEXT: movabsq $3689065127958034230, %rcx # imm = 0x3332313039383736
-; X64-AVX2-NEXT: cmpq %rcx, 16(%rdi)
-; X64-AVX2-NEXT: je .LBB21_3
-; X64-AVX2-NEXT: .LBB21_2: # %res_block
-; X64-AVX2-NEXT: movl $1, %eax
-; X64-AVX2-NEXT: .LBB21_3: # %endblock
-; X64-AVX2-NEXT: testl %eax, %eax
; X64-AVX2-NEXT: setne %al
; X64-AVX2-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
@@ -835,47 +766,28 @@ define i1 @length32_eq(i8* %x, i8* %y) n
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu (%eax), %xmm1
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X86-SSE2-NEXT: pmovmskb %xmm1, %edx
-; X86-SSE2-NEXT: cmpl $65535, %edx # imm = 0xFFFF
-; X86-SSE2-NEXT: jne .LBB23_2
-; X86-SSE2-NEXT: # %bb.1: # %loadbb1
-; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X86-SSE2-NEXT: pmovmskb %xmm1, %ecx
-; X86-SSE2-NEXT: xorl %eax, %eax
-; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
-; X86-SSE2-NEXT: je .LBB23_3
-; X86-SSE2-NEXT: .LBB23_2: # %res_block
-; X86-SSE2-NEXT: xorl %eax, %eax
-; X86-SSE2-NEXT: incl %eax
-; X86-SSE2-NEXT: .LBB23_3: # %endblock
-; X86-SSE2-NEXT: testl %eax, %eax
+; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1
+; X86-SSE2-NEXT: movdqu (%eax), %xmm2
+; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
+; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
+; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
+; X86-SSE2-NEXT: pand %xmm2, %xmm0
+; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
+; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X86-SSE2-NEXT: sete %al
; X86-SSE2-NEXT: retl
;
; X64-SSE2-LABEL: length32_eq:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
+; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
+; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
+; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
+; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
+; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
+; X64-SSE2-NEXT: pand %xmm2, %xmm0
+; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: jne .LBB23_2
-; X64-SSE2-NEXT: # %bb.1: # %loadbb1
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X64-SSE2-NEXT: pmovmskb %xmm1, %ecx
-; X64-SSE2-NEXT: xorl %eax, %eax
-; X64-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
-; X64-SSE2-NEXT: je .LBB23_3
-; X64-SSE2-NEXT: .LBB23_2: # %res_block
-; X64-SSE2-NEXT: movl $1, %eax
-; X64-SSE2-NEXT: .LBB23_3: # %endblock
-; X64-SSE2-NEXT: testl %eax, %eax
; X64-SSE2-NEXT: sete %al
; X64-SSE2-NEXT: retq
;
@@ -910,43 +822,24 @@ define i1 @length32_eq_const(i8* %X) nou
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: movdqu (%eax), %xmm0
+; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
+; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm1
; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %ecx
-; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
-; X86-SSE2-NEXT: jne .LBB24_2
-; X86-SSE2-NEXT: # %bb.1: # %loadbb1
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %ecx
-; X86-SSE2-NEXT: xorl %eax, %eax
-; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
-; X86-SSE2-NEXT: je .LBB24_3
-; X86-SSE2-NEXT: .LBB24_2: # %res_block
-; X86-SSE2-NEXT: xorl %eax, %eax
-; X86-SSE2-NEXT: incl %eax
-; X86-SSE2-NEXT: .LBB24_3: # %endblock
-; X86-SSE2-NEXT: testl %eax, %eax
+; X86-SSE2-NEXT: pand %xmm1, %xmm0
+; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
+; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X86-SSE2-NEXT: setne %al
; X86-SSE2-NEXT: retl
;
; X64-SSE2-LABEL: length32_eq_const:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
+; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm1
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
+; X64-SSE2-NEXT: pand %xmm1, %xmm0
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: jne .LBB24_2
-; X64-SSE2-NEXT: # %bb.1: # %loadbb1
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm0
-; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %ecx
-; X64-SSE2-NEXT: xorl %eax, %eax
-; X64-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
-; X64-SSE2-NEXT: je .LBB24_3
-; X64-SSE2-NEXT: .LBB24_2: # %res_block
-; X64-SSE2-NEXT: movl $1, %eax
-; X64-SSE2-NEXT: .LBB24_3: # %endblock
-; X64-SSE2-NEXT: testl %eax, %eax
; X64-SSE2-NEXT: setne %al
; X64-SSE2-NEXT: retq
;
@@ -1009,21 +902,12 @@ define i1 @length64_eq(i8* %x, i8* %y) n
; X64-AVX2-LABEL: length64_eq:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
+; X64-AVX2-NEXT: vpcmpeqb 32(%rsi), %ymm1, %ymm1
; X64-AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
+; X64-AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
; X64-AVX2-NEXT: cmpl $-1, %eax
-; X64-AVX2-NEXT: jne .LBB26_2
-; X64-AVX2-NEXT: # %bb.1: # %loadbb1
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm0
-; X64-AVX2-NEXT: vpcmpeqb 32(%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpmovmskb %ymm0, %ecx
-; X64-AVX2-NEXT: xorl %eax, %eax
-; X64-AVX2-NEXT: cmpl $-1, %ecx
-; X64-AVX2-NEXT: je .LBB26_3
-; X64-AVX2-NEXT: .LBB26_2: # %res_block
-; X64-AVX2-NEXT: movl $1, %eax
-; X64-AVX2-NEXT: .LBB26_3: # %endblock
-; X64-AVX2-NEXT: testl %eax, %eax
; X64-AVX2-NEXT: setne %al
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
@@ -1059,21 +943,12 @@ define i1 @length64_eq_const(i8* %X) nou
; X64-AVX2-LABEL: length64_eq_const:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
+; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm1, %ymm1
; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
+; X64-AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
; X64-AVX2-NEXT: cmpl $-1, %eax
-; X64-AVX2-NEXT: jne .LBB27_2
-; X64-AVX2-NEXT: # %bb.1: # %loadbb1
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm0
-; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpmovmskb %ymm0, %ecx
-; X64-AVX2-NEXT: xorl %eax, %eax
-; X64-AVX2-NEXT: cmpl $-1, %ecx
-; X64-AVX2-NEXT: je .LBB27_3
-; X64-AVX2-NEXT: .LBB27_2: # %res_block
-; X64-AVX2-NEXT: movl $1, %eax
-; X64-AVX2-NEXT: .LBB27_3: # %endblock
-; X64-AVX2-NEXT: testl %eax, %eax
; X64-AVX2-NEXT: sete %al
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/memcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memcmp.ll?rev=321934&r1=321933&r2=321934&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memcmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/memcmp.ll Sat Jan 6 08:16:04 2018
@@ -191,34 +191,22 @@ define i1 @length3_eq(i8* %X, i8* %Y) no
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl (%ecx), %edx
-; X86-NEXT: cmpw (%eax), %dx
-; X86-NEXT: jne .LBB7_2
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movb 2(%ecx), %dl
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: cmpb 2(%eax), %dl
-; X86-NEXT: je .LBB7_3
-; X86-NEXT: .LBB7_2: # %res_block
-; X86-NEXT: movl $1, %ecx
-; X86-NEXT: .LBB7_3: # %endblock
-; X86-NEXT: testl %ecx, %ecx
+; X86-NEXT: xorw (%eax), %dx
+; X86-NEXT: movb 2(%ecx), %cl
+; X86-NEXT: xorb 2(%eax), %cl
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: orw %dx, %ax
; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: length3_eq:
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: cmpw (%rsi), %ax
-; X64-NEXT: jne .LBB7_2
-; X64-NEXT: # %bb.1: # %loadbb1
+; X64-NEXT: xorw (%rsi), %ax
; X64-NEXT: movb 2(%rdi), %cl
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb 2(%rsi), %cl
-; X64-NEXT: je .LBB7_3
-; X64-NEXT: .LBB7_2: # %res_block
-; X64-NEXT: movl $1, %eax
-; X64-NEXT: .LBB7_3: # %endblock
-; X64-NEXT: testl %eax, %eax
+; X64-NEXT: xorb 2(%rsi), %cl
+; X64-NEXT: movzbl %cl, %ecx
+; X64-NEXT: orw %ax, %cx
; X64-NEXT: setne %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
@@ -348,34 +336,22 @@ define i1 @length5_eq(i8* %X, i8* %Y) no
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: cmpl (%eax), %edx
-; X86-NEXT: jne .LBB12_2
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movb 4(%ecx), %dl
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: cmpb 4(%eax), %dl
-; X86-NEXT: je .LBB12_3
-; X86-NEXT: .LBB12_2: # %res_block
-; X86-NEXT: movl $1, %ecx
-; X86-NEXT: .LBB12_3: # %endblock
-; X86-NEXT: testl %ecx, %ecx
+; X86-NEXT: xorl (%eax), %edx
+; X86-NEXT: movb 4(%ecx), %cl
+; X86-NEXT: xorb 4(%eax), %cl
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: orl %edx, %eax
; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: length5_eq:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: cmpl (%rsi), %eax
-; X64-NEXT: jne .LBB12_2
-; X64-NEXT: # %bb.1: # %loadbb1
+; X64-NEXT: xorl (%rsi), %eax
; X64-NEXT: movb 4(%rdi), %cl
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb 4(%rsi), %cl
-; X64-NEXT: je .LBB12_3
-; X64-NEXT: .LBB12_2: # %res_block
-; X64-NEXT: movl $1, %eax
-; X64-NEXT: .LBB12_3: # %endblock
-; X64-NEXT: testl %eax, %eax
+; X64-NEXT: xorb 4(%rsi), %cl
+; X64-NEXT: movzbl %cl, %ecx
+; X64-NEXT: orl %eax, %ecx
; X64-NEXT: setne %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
@@ -433,17 +409,10 @@ define i1 @length8_eq(i8* %X, i8* %Y) no
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: cmpl (%eax), %edx
-; X86-NEXT: jne .LBB14_2
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: movl 4(%ecx), %edx
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: cmpl 4(%eax), %edx
-; X86-NEXT: je .LBB14_3
-; X86-NEXT: .LBB14_2: # %res_block
-; X86-NEXT: movl $1, %ecx
-; X86-NEXT: .LBB14_3: # %endblock
-; X86-NEXT: testl %ecx, %ecx
+; X86-NEXT: movl 4(%ecx), %ecx
+; X86-NEXT: xorl (%eax), %edx
+; X86-NEXT: xorl 4(%eax), %ecx
+; X86-NEXT: orl %edx, %ecx
; X86-NEXT: sete %al
; X86-NEXT: retl
;
@@ -461,17 +430,12 @@ define i1 @length8_eq(i8* %X, i8* %Y) no
define i1 @length8_eq_const(i8* %X) nounwind {
; X86-LABEL: length8_eq_const:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl $858927408, (%ecx) # imm = 0x33323130
-; X86-NEXT: jne .LBB15_2
-; X86-NEXT: # %bb.1: # %loadbb1
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl $926299444, 4(%ecx) # imm = 0x37363534
-; X86-NEXT: je .LBB15_3
-; X86-NEXT: .LBB15_2: # %res_block
-; X86-NEXT: movl $1, %eax
-; X86-NEXT: .LBB15_3: # %endblock
-; X86-NEXT: testl %eax, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl $858927408, %ecx # imm = 0x33323130
+; X86-NEXT: xorl (%eax), %ecx
+; X86-NEXT: movl $926299444, %edx # imm = 0x37363534
+; X86-NEXT: xorl 4(%eax), %edx
+; X86-NEXT: orl %ecx, %edx
; X86-NEXT: setne %al
; X86-NEXT: retl
;
@@ -502,17 +466,10 @@ define i1 @length12_eq(i8* %X, i8* %Y) n
; X64-LABEL: length12_eq:
; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: cmpq (%rsi), %rax
-; X64-NEXT: jne .LBB16_2
-; X64-NEXT: # %bb.1: # %loadbb1
+; X64-NEXT: xorq (%rsi), %rax
; X64-NEXT: movl 8(%rdi), %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl 8(%rsi), %ecx
-; X64-NEXT: je .LBB16_3
-; X64-NEXT: .LBB16_2: # %res_block
-; X64-NEXT: movl $1, %eax
-; X64-NEXT: .LBB16_3: # %endblock
-; X64-NEXT: testl %eax, %eax
+; X64-NEXT: xorl 8(%rsi), %ecx
+; X64-NEXT: orq %rax, %rcx
; X64-NEXT: setne %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
@@ -754,37 +711,25 @@ define i1 @length24_eq(i8* %x, i8* %y) n
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
+; X64-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X64-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
+; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
+; X64-SSE2-NEXT: pand %xmm1, %xmm2
+; X64-SSE2-NEXT: pmovmskb %xmm2, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: jne .LBB22_2
-; X64-SSE2-NEXT: # %bb.1: # %loadbb1
-; X64-SSE2-NEXT: movq 16(%rdi), %rcx
-; X64-SSE2-NEXT: xorl %eax, %eax
-; X64-SSE2-NEXT: cmpq 16(%rsi), %rcx
-; X64-SSE2-NEXT: je .LBB22_3
-; X64-SSE2-NEXT: .LBB22_2: # %res_block
-; X64-SSE2-NEXT: movl $1, %eax
-; X64-SSE2-NEXT: .LBB22_3: # %endblock
-; X64-SSE2-NEXT: testl %eax, %eax
; X64-SSE2-NEXT: sete %al
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: length24_eq:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
+; X64-AVX-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
; X64-AVX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
+; X64-AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpmovmskb %xmm0, %eax
; X64-AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-AVX-NEXT: jne .LBB22_2
-; X64-AVX-NEXT: # %bb.1: # %loadbb1
-; X64-AVX-NEXT: movq 16(%rdi), %rcx
-; X64-AVX-NEXT: xorl %eax, %eax
-; X64-AVX-NEXT: cmpq 16(%rsi), %rcx
-; X64-AVX-NEXT: je .LBB22_3
-; X64-AVX-NEXT: .LBB22_2: # %res_block
-; X64-AVX-NEXT: movl $1, %eax
-; X64-AVX-NEXT: .LBB22_3: # %endblock
-; X64-AVX-NEXT: testl %eax, %eax
; X64-AVX-NEXT: sete %al
; X64-AVX-NEXT: retq
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
@@ -808,38 +753,28 @@ define i1 @length24_eq_const(i8* %X) nou
; X64-SSE2-LABEL: length24_eq_const:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X64-SSE2-NEXT: movabsq $3689065127958034230, %rax # imm = 0x3332313039383736
+; X64-SSE2-NEXT: movq %rax, %xmm2
+; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm2
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
+; X64-SSE2-NEXT: pand %xmm2, %xmm0
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: jne .LBB23_2
-; X64-SSE2-NEXT: # %bb.1: # %loadbb1
-; X64-SSE2-NEXT: xorl %eax, %eax
-; X64-SSE2-NEXT: movabsq $3689065127958034230, %rcx # imm = 0x3332313039383736
-; X64-SSE2-NEXT: cmpq %rcx, 16(%rdi)
-; X64-SSE2-NEXT: je .LBB23_3
-; X64-SSE2-NEXT: .LBB23_2: # %res_block
-; X64-SSE2-NEXT: movl $1, %eax
-; X64-SSE2-NEXT: .LBB23_3: # %endblock
-; X64-SSE2-NEXT: testl %eax, %eax
; X64-SSE2-NEXT: setne %al
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: length24_eq_const:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64-AVX-NEXT: movabsq $3689065127958034230, %rax # imm = 0x3332313039383736
+; X64-AVX-NEXT: vmovq %rax, %xmm2
+; X64-AVX-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
; X64-AVX-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpmovmskb %xmm0, %eax
; X64-AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-AVX-NEXT: jne .LBB23_2
-; X64-AVX-NEXT: # %bb.1: # %loadbb1
-; X64-AVX-NEXT: xorl %eax, %eax
-; X64-AVX-NEXT: movabsq $3689065127958034230, %rcx # imm = 0x3332313039383736
-; X64-AVX-NEXT: cmpq %rcx, 16(%rdi)
-; X64-AVX-NEXT: je .LBB23_3
-; X64-AVX-NEXT: .LBB23_2: # %res_block
-; X64-AVX-NEXT: movl $1, %eax
-; X64-AVX-NEXT: .LBB23_3: # %endblock
-; X64-AVX-NEXT: testl %eax, %eax
; X64-AVX-NEXT: setne %al
; X64-AVX-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
@@ -898,67 +833,40 @@ define i1 @length32_eq(i8* %x, i8* %y) n
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu (%eax), %xmm1
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X86-SSE2-NEXT: pmovmskb %xmm1, %edx
-; X86-SSE2-NEXT: cmpl $65535, %edx # imm = 0xFFFF
-; X86-SSE2-NEXT: jne .LBB25_2
-; X86-SSE2-NEXT: # %bb.1: # %loadbb1
-; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
-; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X86-SSE2-NEXT: pmovmskb %xmm1, %ecx
-; X86-SSE2-NEXT: xorl %eax, %eax
-; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
-; X86-SSE2-NEXT: je .LBB25_3
-; X86-SSE2-NEXT: .LBB25_2: # %res_block
-; X86-SSE2-NEXT: movl $1, %eax
-; X86-SSE2-NEXT: .LBB25_3: # %endblock
-; X86-SSE2-NEXT: testl %eax, %eax
+; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1
+; X86-SSE2-NEXT: movdqu (%eax), %xmm2
+; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
+; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
+; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
+; X86-SSE2-NEXT: pand %xmm2, %xmm0
+; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
+; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X86-SSE2-NEXT: sete %al
; X86-SSE2-NEXT: retl
;
; X64-SSE2-LABEL: length32_eq:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
+; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
+; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
+; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
+; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
+; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
+; X64-SSE2-NEXT: pand %xmm2, %xmm0
+; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: jne .LBB25_2
-; X64-SSE2-NEXT: # %bb.1: # %loadbb1
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm1
-; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; X64-SSE2-NEXT: pmovmskb %xmm1, %ecx
-; X64-SSE2-NEXT: xorl %eax, %eax
-; X64-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
-; X64-SSE2-NEXT: je .LBB25_3
-; X64-SSE2-NEXT: .LBB25_2: # %res_block
-; X64-SSE2-NEXT: movl $1, %eax
-; X64-SSE2-NEXT: .LBB25_3: # %endblock
-; X64-SSE2-NEXT: testl %eax, %eax
; X64-SSE2-NEXT: sete %al
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: length32_eq:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX1-NEXT: vmovdqu 16(%rdi), %xmm1
+; X64-AVX1-NEXT: vpcmpeqb 16(%rsi), %xmm1, %xmm1
; X64-AVX1-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpmovmskb %xmm0, %eax
; X64-AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-AVX1-NEXT: jne .LBB25_2
-; X64-AVX1-NEXT: # %bb.1: # %loadbb1
-; X64-AVX1-NEXT: vmovdqu 16(%rdi), %xmm0
-; X64-AVX1-NEXT: vpcmpeqb 16(%rsi), %xmm0, %xmm0
-; X64-AVX1-NEXT: vpmovmskb %xmm0, %ecx
-; X64-AVX1-NEXT: xorl %eax, %eax
-; X64-AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
-; X64-AVX1-NEXT: je .LBB25_3
-; X64-AVX1-NEXT: .LBB25_2: # %res_block
-; X64-AVX1-NEXT: movl $1, %eax
-; X64-AVX1-NEXT: .LBB25_3: # %endblock
-; X64-AVX1-NEXT: testl %eax, %eax
; X64-AVX1-NEXT: sete %al
; X64-AVX1-NEXT: retq
;
@@ -1005,63 +913,36 @@ define i1 @length32_eq_const(i8* %X) nou
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: movdqu (%eax), %xmm0
+; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
+; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm1
; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %ecx
-; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
-; X86-SSE2-NEXT: jne .LBB26_2
-; X86-SSE2-NEXT: # %bb.1: # %loadbb1
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
-; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
-; X86-SSE2-NEXT: pmovmskb %xmm0, %ecx
-; X86-SSE2-NEXT: xorl %eax, %eax
-; X86-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
-; X86-SSE2-NEXT: je .LBB26_3
-; X86-SSE2-NEXT: .LBB26_2: # %res_block
-; X86-SSE2-NEXT: movl $1, %eax
-; X86-SSE2-NEXT: .LBB26_3: # %endblock
-; X86-SSE2-NEXT: testl %eax, %eax
+; X86-SSE2-NEXT: pand %xmm1, %xmm0
+; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
+; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X86-SSE2-NEXT: setne %al
; X86-SSE2-NEXT: retl
;
; X64-SSE2-LABEL: length32_eq_const:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
+; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm1
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
+; X64-SSE2-NEXT: pand %xmm1, %xmm0
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-SSE2-NEXT: jne .LBB26_2
-; X64-SSE2-NEXT: # %bb.1: # %loadbb1
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm0
-; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
-; X64-SSE2-NEXT: pmovmskb %xmm0, %ecx
-; X64-SSE2-NEXT: xorl %eax, %eax
-; X64-SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
-; X64-SSE2-NEXT: je .LBB26_3
-; X64-SSE2-NEXT: .LBB26_2: # %res_block
-; X64-SSE2-NEXT: movl $1, %eax
-; X64-SSE2-NEXT: .LBB26_3: # %endblock
-; X64-SSE2-NEXT: testl %eax, %eax
; X64-SSE2-NEXT: setne %al
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: length32_eq_const:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX1-NEXT: vmovdqu 16(%rdi), %xmm1
+; X64-AVX1-NEXT: vpcmpeqb {{.*}}(%rip), %xmm1, %xmm1
; X64-AVX1-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpmovmskb %xmm0, %eax
; X64-AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X64-AVX1-NEXT: jne .LBB26_2
-; X64-AVX1-NEXT: # %bb.1: # %loadbb1
-; X64-AVX1-NEXT: vmovdqu 16(%rdi), %xmm0
-; X64-AVX1-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
-; X64-AVX1-NEXT: vpmovmskb %xmm0, %ecx
-; X64-AVX1-NEXT: xorl %eax, %eax
-; X64-AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
-; X64-AVX1-NEXT: je .LBB26_3
-; X64-AVX1-NEXT: .LBB26_2: # %res_block
-; X64-AVX1-NEXT: movl $1, %eax
-; X64-AVX1-NEXT: .LBB26_3: # %endblock
-; X64-AVX1-NEXT: testl %eax, %eax
; X64-AVX1-NEXT: setne %al
; X64-AVX1-NEXT: retq
;
@@ -1134,21 +1015,12 @@ define i1 @length64_eq(i8* %x, i8* %y) n
; X64-AVX2-LABEL: length64_eq:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
+; X64-AVX2-NEXT: vpcmpeqb 32(%rsi), %ymm1, %ymm1
; X64-AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
+; X64-AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
; X64-AVX2-NEXT: cmpl $-1, %eax
-; X64-AVX2-NEXT: jne .LBB28_2
-; X64-AVX2-NEXT: # %bb.1: # %loadbb1
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm0
-; X64-AVX2-NEXT: vpcmpeqb 32(%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpmovmskb %ymm0, %ecx
-; X64-AVX2-NEXT: xorl %eax, %eax
-; X64-AVX2-NEXT: cmpl $-1, %ecx
-; X64-AVX2-NEXT: je .LBB28_3
-; X64-AVX2-NEXT: .LBB28_2: # %res_block
-; X64-AVX2-NEXT: movl $1, %eax
-; X64-AVX2-NEXT: .LBB28_3: # %endblock
-; X64-AVX2-NEXT: testl %eax, %eax
; X64-AVX2-NEXT: setne %al
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
@@ -1195,21 +1067,12 @@ define i1 @length64_eq_const(i8* %X) nou
; X64-AVX2-LABEL: length64_eq_const:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
+; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm1, %ymm1
; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
+; X64-AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax
; X64-AVX2-NEXT: cmpl $-1, %eax
-; X64-AVX2-NEXT: jne .LBB29_2
-; X64-AVX2-NEXT: # %bb.1: # %loadbb1
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm0
-; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpmovmskb %ymm0, %ecx
-; X64-AVX2-NEXT: xorl %eax, %eax
-; X64-AVX2-NEXT: cmpl $-1, %ecx
-; X64-AVX2-NEXT: je .LBB29_3
-; X64-AVX2-NEXT: .LBB29_2: # %res_block
-; X64-AVX2-NEXT: movl $1, %eax
-; X64-AVX2-NEXT: .LBB29_3: # %endblock
-; X64-AVX2-NEXT: testl %eax, %eax
; X64-AVX2-NEXT: sete %al
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
Modified: llvm/trunk/test/Transforms/ExpandMemCmp/X86/memcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ExpandMemCmp/X86/memcmp.ll?rev=321934&r1=321933&r2=321934&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/ExpandMemCmp/X86/memcmp.ll (original)
+++ llvm/trunk/test/Transforms/ExpandMemCmp/X86/memcmp.ll Sat Jan 6 08:16:04 2018
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -expandmemcmp -mtriple=i686-unknown-unknown -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s | FileCheck %s --check-prefix=ALL --check-prefix=X32
-; RUN: opt -S -expandmemcmp -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=ALL --check-prefix=X64
+; RUN: opt -S -expandmemcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=ALL --check-prefix=X64 --check-prefix=X64_1LD
+; RUN: opt -S -expandmemcmp -memcmp-num-loads-per-block=2 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=ALL --check-prefix=X64 --check-prefix=X64_2LD
declare i32 @memcmp(i8* nocapture, i8* nocapture, i64)
@@ -430,29 +431,69 @@ define i32 @cmp_eq2(i8* nocapture readon
}
define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp_eq3(
-; ALL-NEXT: br label [[LOADBB:%.*]]
-; ALL: res_block:
-; ALL-NEXT: br label [[ENDBLOCK:%.*]]
-; ALL: loadbb:
-; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
-; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
-; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
-; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
-; ALL-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
-; ALL-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
-; ALL: loadbb1:
-; ALL-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 2
-; ALL-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 2
-; ALL-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
-; ALL-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
-; ALL-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
-; ALL-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
-; ALL: endblock:
-; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
-; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
-; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; ALL-NEXT: ret i32 [[CONV]]
+; X32-LABEL: @cmp_eq3(
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
+; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
+; X32-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
+; X32-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]]
+; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 2
+; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 2
+; X32-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
+; X32-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16
+; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16
+; X32-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]]
+; X32-NEXT: [[TMP13:%.*]] = or i16 [[TMP5]], [[TMP12]]
+; X32-NEXT: [[TMP14:%.*]] = icmp ne i16 [[TMP13]], 0
+; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X32-NEXT: ret i32 [[CONV]]
+;
+; X64_1LD-LABEL: @cmp_eq3(
+; X64_1LD-NEXT: br label [[LOADBB:%.*]]
+; X64_1LD: res_block:
+; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
+; X64_1LD: loadbb:
+; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
+; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
+; X64_1LD-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
+; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
+; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD: loadbb1:
+; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 2
+; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 2
+; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
+; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
+; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD: endblock:
+; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_1LD-NEXT: ret i32 [[CONV]]
+;
+; X64_2LD-LABEL: @cmp_eq3(
+; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
+; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
+; X64_2LD-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
+; X64_2LD-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]]
+; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 2
+; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 2
+; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
+; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16
+; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16
+; X64_2LD-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]]
+; X64_2LD-NEXT: [[TMP13:%.*]] = or i16 [[TMP5]], [[TMP12]]
+; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i16 [[TMP13]], 0
+; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_2LD-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 3)
%cmp = icmp eq i32 %call, 0
@@ -479,29 +520,69 @@ define i32 @cmp_eq4(i8* nocapture readon
}
define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp_eq5(
-; ALL-NEXT: br label [[LOADBB:%.*]]
-; ALL: res_block:
-; ALL-NEXT: br label [[ENDBLOCK:%.*]]
-; ALL: loadbb:
-; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
-; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
-; ALL-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
-; ALL-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
-; ALL: loadbb1:
-; ALL-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
-; ALL-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 4
-; ALL-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
-; ALL-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
-; ALL-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
-; ALL-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
-; ALL: endblock:
-; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
-; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
-; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; ALL-NEXT: ret i32 [[CONV]]
+; X32-LABEL: @cmp_eq5(
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
+; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
+; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 4
+; X32-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
+; X32-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32
+; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32
+; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
+; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
+; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
+; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X32-NEXT: ret i32 [[CONV]]
+;
+; X64_1LD-LABEL: @cmp_eq5(
+; X64_1LD-NEXT: br label [[LOADBB:%.*]]
+; X64_1LD: res_block:
+; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
+; X64_1LD: loadbb:
+; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
+; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD: loadbb1:
+; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
+; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 4
+; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
+; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
+; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD: endblock:
+; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_1LD-NEXT: ret i32 [[CONV]]
+;
+; X64_2LD-LABEL: @cmp_eq5(
+; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
+; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
+; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 4
+; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
+; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32
+; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32
+; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
+; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
+; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
+; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_2LD-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 5)
%cmp = icmp eq i32 %call, 0
@@ -510,31 +591,75 @@ define i32 @cmp_eq5(i8* nocapture readon
}
define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp_eq6(
-; ALL-NEXT: br label [[LOADBB:%.*]]
-; ALL: res_block:
-; ALL-NEXT: br label [[ENDBLOCK:%.*]]
-; ALL: loadbb:
-; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
-; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
-; ALL-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
-; ALL-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
-; ALL: loadbb1:
-; ALL-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
-; ALL-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
-; ALL-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 2
-; ALL-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 2
-; ALL-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
-; ALL-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
-; ALL-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
-; ALL-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
-; ALL: endblock:
-; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
-; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
-; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; ALL-NEXT: ret i32 [[CONV]]
+; X32-LABEL: @cmp_eq6(
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
+; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
+; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
+; X32-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 2
+; X32-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 2
+; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
+; X32-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
+; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
+; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
+; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
+; X32-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]]
+; X32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+; X32-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X32-NEXT: ret i32 [[CONV]]
+;
+; X64_1LD-LABEL: @cmp_eq6(
+; X64_1LD-NEXT: br label [[LOADBB:%.*]]
+; X64_1LD: res_block:
+; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
+; X64_1LD: loadbb:
+; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
+; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD: loadbb1:
+; X64_1LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
+; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
+; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 2
+; X64_1LD-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 2
+; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
+; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
+; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
+; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD: endblock:
+; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_1LD-NEXT: ret i32 [[CONV]]
+;
+; X64_2LD-LABEL: @cmp_eq6(
+; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
+; X64_2LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
+; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
+; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 2
+; X64_2LD-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 2
+; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
+; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
+; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
+; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
+; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
+; X64_2LD-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]]
+; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_2LD-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 6)
%cmp = icmp eq i32 %call, 0
@@ -557,28 +682,22 @@ define i32 @cmp_eq7(i8* nocapture readon
define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-LABEL: @cmp_eq8(
-; X32-NEXT: br label [[LOADBB:%.*]]
-; X32: res_block:
-; X32-NEXT: br label [[ENDBLOCK:%.*]]
-; X32: loadbb:
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
-; X32-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
-; X32-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
-; X32: loadbb1:
+; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i32*
; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i32*
; X32-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 1
; X32-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 1
; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
-; X32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
-; X32-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
-; X32: endblock:
-; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
-; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
+; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
+; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
+; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
@@ -606,29 +725,49 @@ define i32 @cmp_eq9(i8* nocapture readon
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
-; X64-LABEL: @cmp_eq9(
-; X64-NEXT: br label [[LOADBB:%.*]]
-; X64: res_block:
-; X64-NEXT: br label [[ENDBLOCK:%.*]]
-; X64: loadbb:
-; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
-; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
-; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
-; X64-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
-; X64: loadbb1:
-; X64-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8
-; X64-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 8
-; X64-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
-; X64-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
-; X64-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
-; X64-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
-; X64: endblock:
-; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
-; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
-; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; X64-NEXT: ret i32 [[CONV]]
+; X64_1LD-LABEL: @cmp_eq9(
+; X64_1LD-NEXT: br label [[LOADBB:%.*]]
+; X64_1LD: res_block:
+; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
+; X64_1LD: loadbb:
+; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
+; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD: loadbb1:
+; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8
+; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 8
+; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
+; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
+; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD: endblock:
+; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_1LD-NEXT: ret i32 [[CONV]]
+;
+; X64_2LD-LABEL: @cmp_eq9(
+; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
+; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8
+; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 8
+; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
+; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i64
+; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i64
+; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
+; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
+; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
+; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_2LD-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 9)
%cmp = icmp eq i32 %call, 0
@@ -643,31 +782,53 @@ define i32 @cmp_eq10(i8* nocapture reado
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
-; X64-LABEL: @cmp_eq10(
-; X64-NEXT: br label [[LOADBB:%.*]]
-; X64: res_block:
-; X64-NEXT: br label [[ENDBLOCK:%.*]]
-; X64: loadbb:
-; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
-; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
-; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
-; X64-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
-; X64: loadbb1:
-; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
-; X64-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
-; X64-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 4
-; X64-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 4
-; X64-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
-; X64-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
-; X64-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
-; X64-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
-; X64: endblock:
-; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
-; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
-; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; X64-NEXT: ret i32 [[CONV]]
+; X64_1LD-LABEL: @cmp_eq10(
+; X64_1LD-NEXT: br label [[LOADBB:%.*]]
+; X64_1LD: res_block:
+; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
+; X64_1LD: loadbb:
+; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
+; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD: loadbb1:
+; X64_1LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
+; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
+; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 4
+; X64_1LD-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 4
+; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
+; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
+; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
+; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD: endblock:
+; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_1LD-NEXT: ret i32 [[CONV]]
+;
+; X64_2LD-LABEL: @cmp_eq10(
+; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
+; X64_2LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
+; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
+; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 4
+; X64_2LD-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 4
+; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
+; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
+; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i64
+; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i64
+; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]]
+; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]]
+; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0
+; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_2LD-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 10)
%cmp = icmp eq i32 %call, 0
@@ -695,31 +856,53 @@ define i32 @cmp_eq12(i8* nocapture reado
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
-; X64-LABEL: @cmp_eq12(
-; X64-NEXT: br label [[LOADBB:%.*]]
-; X64: res_block:
-; X64-NEXT: br label [[ENDBLOCK:%.*]]
-; X64: loadbb:
-; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
-; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
-; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
-; X64-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
-; X64: loadbb1:
-; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i32*
-; X64-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i32*
-; X64-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 2
-; X64-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 2
-; X64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
-; X64-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
-; X64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
-; X64-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
-; X64: endblock:
-; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
-; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
-; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; X64-NEXT: ret i32 [[CONV]]
+; X64_1LD-LABEL: @cmp_eq12(
+; X64_1LD-NEXT: br label [[LOADBB:%.*]]
+; X64_1LD: res_block:
+; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
+; X64_1LD: loadbb:
+; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
+; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD: loadbb1:
+; X64_1LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i32*
+; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i32*
+; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 2
+; X64_1LD-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 2
+; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
+; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
+; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
+; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD: endblock:
+; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_1LD-NEXT: ret i32 [[CONV]]
+;
+; X64_2LD-LABEL: @cmp_eq12(
+; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
+; X64_2LD-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i32*
+; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i32*
+; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 2
+; X64_2LD-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 2
+; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
+; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
+; X64_2LD-NEXT: [[TMP12:%.*]] = zext i32 [[TMP10]] to i64
+; X64_2LD-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64
+; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]]
+; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]]
+; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0
+; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_2LD-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 12)
%cmp = icmp eq i32 %call, 0
More information about the llvm-commits
mailing list