[llvm] 0d05093 - [X86] NFC: expand inline memcmp test coverage
David Zarzycki via llvm-commits
llvm-commits at lists.llvm.org
Sat Oct 26 11:21:44 PDT 2019
Author: David Zarzycki
Date: 2019-10-26T21:14:57+03:00
New Revision: 0d0509384f054cb4f13260786ee48163ac94d123
URL: https://github.com/llvm/llvm-project/commit/0d0509384f054cb4f13260786ee48163ac94d123
DIFF: https://github.com/llvm/llvm-project/commit/0d0509384f054cb4f13260786ee48163ac94d123.diff
LOG: [X86] NFC: expand inline memcmp test coverage
1) Adds SSE4.1 coverage.
2) Adds prefer-256-bit or not coverage.
3) Adds more power-of-two tests up to 512 bytes.
4) Adds power-of-two-minus-one tests to verify overlapping loads.
5) Adds power-of-two-plus-one-half tests (48, 96, 192, and 384).
6) Adds greater-than/less-than tests from 16 to 512 bytes.
https://reviews.llvm.org/D69222
Added:
Modified:
llvm/test/CodeGen/X86/memcmp.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll
index 97116d991c10..1493879649ef 100644
--- a/llvm/test/CodeGen/X86/memcmp.ll
+++ b/llvm/test/CodeGen/X86/memcmp.ll
@@ -1,17 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefixes=X86,X86-NOSSE
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=X86,SSE,X86-SSE1
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,SSE,X86-SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX512,X64-AVX512F
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefixes=X64,X64-AVX512,X64-AVX512BW
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefixes=X86,X86-NOSSE
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE1
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE2
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,+prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,-prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512BW
; This tests codegen time inlining/optimization of memcmp
; rdar://6480398
- at .str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
+ at .str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1
declare i32 @memcmp(i8*, i8*, i64)
@@ -189,7 +193,7 @@ define i1 @length2_eq_const(i8* %X) nounwind {
; X64-NEXT: cmpl $12849, %eax # imm = 0x3231
; X64-NEXT: setne %al
; X64-NEXT: retq
- %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 2) nounwind
+ %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 2) nounwind
%c = icmp ne i32 %m, 0
ret i1 %c
}
@@ -431,7 +435,7 @@ define i1 @length4_eq_const(i8* %X) nounwind {
; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231
; X64-NEXT: sete %al
; X64-NEXT: retq
- %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 4) nounwind
+ %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 4) nounwind
%c = icmp eq i32 %m, 0
ret i1 %c
}
@@ -679,7 +683,7 @@ define i1 @length8_eq_const(i8* %X) nounwind {
; X64-NEXT: cmpq %rax, (%rdi)
; X64-NEXT: setne %al
; X64-NEXT: retq
- %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 8) nounwind
+ %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 8) nounwind
%c = icmp ne i32 %m, 0
ret i1 %c
}
@@ -990,6 +994,17 @@ define i1 @length16_eq(i8* %x, i8* %y) nounwind {
; X86-SSE2-NEXT: setne %al
; X86-SSE2-NEXT: retl
;
+; X86-SSE41-LABEL: length16_eq:
+; X86-SSE41: # %bb.0:
+; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
+; X86-SSE41-NEXT: movdqu (%eax), %xmm1
+; X86-SSE41-NEXT: pxor %xmm0, %xmm1
+; X86-SSE41-NEXT: ptest %xmm1, %xmm1
+; X86-SSE41-NEXT: setne %al
+; X86-SSE41-NEXT: retl
+;
; X64-SSE2-LABEL: length16_eq:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
@@ -1000,6 +1015,15 @@ define i1 @length16_eq(i8* %x, i8* %y) nounwind {
; X64-SSE2-NEXT: setne %al
; X64-SSE2-NEXT: retq
;
+; X64-SSE41-LABEL: length16_eq:
+; X64-SSE41: # %bb.0:
+; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE41-NEXT: movdqu (%rsi), %xmm1
+; X64-SSE41-NEXT: pxor %xmm0, %xmm1
+; X64-SSE41-NEXT: ptest %xmm1, %xmm1
+; X64-SSE41-NEXT: setne %al
+; X64-SSE41-NEXT: retq
+;
; X64-AVX-LABEL: length16_eq:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
@@ -1007,19 +1031,97 @@ define i1 @length16_eq(i8* %x, i8* %y) nounwind {
; X64-AVX-NEXT: vptest %xmm0, %xmm0
; X64-AVX-NEXT: setne %al
; X64-AVX-NEXT: retq
-;
-; X64-AVX512-LABEL: length16_eq:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX512-NEXT: vptest %xmm0, %xmm0
-; X64-AVX512-NEXT: setne %al
-; X64-AVX512-NEXT: retq
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind
%cmp = icmp ne i32 %call, 0
ret i1 %cmp
}
+define i1 @length16_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length16_lt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $16
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: shrl $31, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: length16_lt:
+; X64: # %bb.0:
+; X64-NEXT: movq (%rdi), %rcx
+; X64-NEXT: movq (%rsi), %rdx
+; X64-NEXT: bswapq %rcx
+; X64-NEXT: bswapq %rdx
+; X64-NEXT: cmpq %rdx, %rcx
+; X64-NEXT: jne .LBB33_2
+; X64-NEXT: # %bb.1: # %loadbb1
+; X64-NEXT: movq 8(%rdi), %rcx
+; X64-NEXT: movq 8(%rsi), %rdx
+; X64-NEXT: bswapq %rcx
+; X64-NEXT: bswapq %rdx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpq %rdx, %rcx
+; X64-NEXT: je .LBB33_3
+; X64-NEXT: .LBB33_2: # %res_block
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpq %rdx, %rcx
+; X64-NEXT: setae %al
+; X64-NEXT: leal -1(%rax,%rax), %eax
+; X64-NEXT: .LBB33_3: # %endblock
+; X64-NEXT: shrl $31, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length16_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length16_gt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $16
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setg %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length16_gt:
+; X64: # %bb.0:
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: movq (%rsi), %rcx
+; X64-NEXT: bswapq %rax
+; X64-NEXT: bswapq %rcx
+; X64-NEXT: cmpq %rcx, %rax
+; X64-NEXT: jne .LBB34_2
+; X64-NEXT: # %bb.1: # %loadbb1
+; X64-NEXT: movq 8(%rdi), %rax
+; X64-NEXT: movq 8(%rsi), %rcx
+; X64-NEXT: bswapq %rax
+; X64-NEXT: bswapq %rcx
+; X64-NEXT: xorl %edx, %edx
+; X64-NEXT: cmpq %rcx, %rax
+; X64-NEXT: je .LBB34_3
+; X64-NEXT: .LBB34_2: # %res_block
+; X64-NEXT: xorl %edx, %edx
+; X64-NEXT: cmpq %rcx, %rax
+; X64-NEXT: setae %dl
+; X64-NEXT: leal -1(%rdx,%rdx), %edx
+; X64-NEXT: .LBB34_3: # %endblock
+; X64-NEXT: testl %edx, %edx
+; X64-NEXT: setg %al
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
define i1 @length16_eq_const(i8* %X) nounwind {
; X86-NOSSE-LABEL: length16_eq_const:
; X86-NOSSE: # %bb.0:
@@ -1055,6 +1157,15 @@ define i1 @length16_eq_const(i8* %X) nounwind {
; X86-SSE2-NEXT: sete %al
; X86-SSE2-NEXT: retl
;
+; X86-SSE41-LABEL: length16_eq_const:
+; X86-SSE41: # %bb.0:
+; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE41-NEXT: movdqu (%eax), %xmm0
+; X86-SSE41-NEXT: pxor {{\.LCPI.*}}, %xmm0
+; X86-SSE41-NEXT: ptest %xmm0, %xmm0
+; X86-SSE41-NEXT: sete %al
+; X86-SSE41-NEXT: retl
+;
; X64-SSE2-LABEL: length16_eq_const:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
@@ -1064,6 +1175,14 @@ define i1 @length16_eq_const(i8* %X) nounwind {
; X64-SSE2-NEXT: sete %al
; X64-SSE2-NEXT: retq
;
+; X64-SSE41-LABEL: length16_eq_const:
+; X64-SSE41: # %bb.0:
+; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
+; X64-SSE41-NEXT: ptest %xmm0, %xmm0
+; X64-SSE41-NEXT: sete %al
+; X64-SSE41-NEXT: retq
+;
; X64-AVX-LABEL: length16_eq_const:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
@@ -1071,15 +1190,7 @@ define i1 @length16_eq_const(i8* %X) nounwind {
; X64-AVX-NEXT: vptest %xmm0, %xmm0
; X64-AVX-NEXT: sete %al
; X64-AVX-NEXT: retq
-;
-; X64-AVX512-LABEL: length16_eq_const:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
-; X64-AVX512-NEXT: vptest %xmm0, %xmm0
-; X64-AVX512-NEXT: sete %al
-; X64-AVX512-NEXT: retq
- %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 16) nounwind
+ %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 16) nounwind
%c = icmp eq i32 %m, 0
ret i1 %c
}
@@ -1146,6 +1257,21 @@ define i1 @length24_eq(i8* %x, i8* %y) nounwind {
; X86-SSE2-NEXT: sete %al
; X86-SSE2-NEXT: retl
;
+; X86-SSE41-LABEL: length24_eq:
+; X86-SSE41: # %bb.0:
+; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
+; X86-SSE41-NEXT: movdqu 8(%ecx), %xmm1
+; X86-SSE41-NEXT: movdqu (%eax), %xmm2
+; X86-SSE41-NEXT: pxor %xmm0, %xmm2
+; X86-SSE41-NEXT: movdqu 8(%eax), %xmm0
+; X86-SSE41-NEXT: pxor %xmm1, %xmm0
+; X86-SSE41-NEXT: por %xmm2, %xmm0
+; X86-SSE41-NEXT: ptest %xmm0, %xmm0
+; X86-SSE41-NEXT: sete %al
+; X86-SSE41-NEXT: retl
+;
; X64-SSE2-LABEL: length24_eq:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
@@ -1160,6 +1286,19 @@ define i1 @length24_eq(i8* %x, i8* %y) nounwind {
; X64-SSE2-NEXT: sete %al
; X64-SSE2-NEXT: retq
;
+; X64-SSE41-LABEL: length24_eq:
+; X64-SSE41: # %bb.0:
+; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE41-NEXT: movdqu (%rsi), %xmm1
+; X64-SSE41-NEXT: pxor %xmm0, %xmm1
+; X64-SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X64-SSE41-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
+; X64-SSE41-NEXT: pxor %xmm0, %xmm2
+; X64-SSE41-NEXT: por %xmm1, %xmm2
+; X64-SSE41-NEXT: ptest %xmm2, %xmm2
+; X64-SSE41-NEXT: sete %al
+; X64-SSE41-NEXT: retq
+;
; X64-AVX-LABEL: length24_eq:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
@@ -1171,23 +1310,65 @@ define i1 @length24_eq(i8* %x, i8* %y) nounwind {
; X64-AVX-NEXT: vptest %xmm0, %xmm0
; X64-AVX-NEXT: sete %al
; X64-AVX-NEXT: retq
-;
-; X64-AVX512-LABEL: length24_eq:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; X64-AVX512-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; X64-AVX512-NEXT: vpxor %xmm2, %xmm1, %xmm1
-; X64-AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vptest %xmm0, %xmm0
-; X64-AVX512-NEXT: sete %al
-; X64-AVX512-NEXT: retq
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
+define i1 @length24_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length24_lt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $24
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: shrl $31, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: length24_lt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $24, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: shrl $31, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length24_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length24_gt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $24
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setg %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length24_gt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $24, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setg %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
define i1 @length24_eq_const(i8* %X) nounwind {
; X86-NOSSE-LABEL: length24_eq_const:
; X86-NOSSE: # %bb.0:
@@ -1226,6 +1407,18 @@ define i1 @length24_eq_const(i8* %X) nounwind {
; X86-SSE2-NEXT: setne %al
; X86-SSE2-NEXT: retl
;
+; X86-SSE41-LABEL: length24_eq_const:
+; X86-SSE41: # %bb.0:
+; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE41-NEXT: movdqu (%eax), %xmm0
+; X86-SSE41-NEXT: movdqu 8(%eax), %xmm1
+; X86-SSE41-NEXT: pxor {{\.LCPI.*}}, %xmm1
+; X86-SSE41-NEXT: pxor {{\.LCPI.*}}, %xmm0
+; X86-SSE41-NEXT: por %xmm1, %xmm0
+; X86-SSE41-NEXT: ptest %xmm0, %xmm0
+; X86-SSE41-NEXT: setne %al
+; X86-SSE41-NEXT: retl
+;
; X64-SSE2-LABEL: length24_eq_const:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
@@ -1238,6 +1431,17 @@ define i1 @length24_eq_const(i8* %X) nounwind {
; X64-SSE2-NEXT: setne %al
; X64-SSE2-NEXT: retq
;
+; X64-SSE41-LABEL: length24_eq_const:
+; X64-SSE41: # %bb.0:
+; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X64-SSE41-NEXT: pxor {{.*}}(%rip), %xmm1
+; X64-SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
+; X64-SSE41-NEXT: por %xmm1, %xmm0
+; X64-SSE41-NEXT: ptest %xmm0, %xmm0
+; X64-SSE41-NEXT: setne %al
+; X64-SSE41-NEXT: retq
+;
; X64-AVX-LABEL: length24_eq_const:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
@@ -1248,48 +1452,35 @@ define i1 @length24_eq_const(i8* %X) nounwind {
; X64-AVX-NEXT: vptest %xmm0, %xmm0
; X64-AVX-NEXT: setne %al
; X64-AVX-NEXT: retq
-;
-; X64-AVX512-LABEL: length24_eq_const:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm1, %xmm1
-; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
-; X64-AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vptest %xmm0, %xmm0
-; X64-AVX512-NEXT: setne %al
-; X64-AVX512-NEXT: retq
- %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
+ %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
%c = icmp ne i32 %m, 0
ret i1 %c
}
-define i32 @length32(i8* %X, i8* %Y) nounwind {
-; X86-LABEL: length32:
+define i32 @length31(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length31:
; X86: # %bb.0:
; X86-NEXT: pushl $0
-; X86-NEXT: pushl $32
+; X86-NEXT: pushl $31
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll memcmp
; X86-NEXT: addl $16, %esp
; X86-NEXT: retl
;
-; X64-LABEL: length32:
+; X64-LABEL: length31:
; X64: # %bb.0:
-; X64-NEXT: movl $32, %edx
+; X64-NEXT: movl $31, %edx
; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 31) nounwind
ret i32 %m
}
-; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
-
-define i1 @length32_eq(i8* %x, i8* %y) nounwind {
-; X86-NOSSE-LABEL: length32_eq:
+define i1 @length31_eq(i8* %x, i8* %y) nounwind {
+; X86-NOSSE-LABEL: length31_eq:
; X86-NOSSE: # %bb.0:
; X86-NOSSE-NEXT: pushl $0
-; X86-NOSSE-NEXT: pushl $32
+; X86-NOSSE-NEXT: pushl $31
; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: calll memcmp
@@ -1298,10 +1489,10 @@ define i1 @length32_eq(i8* %x, i8* %y) nounwind {
; X86-NOSSE-NEXT: sete %al
; X86-NOSSE-NEXT: retl
;
-; X86-SSE1-LABEL: length32_eq:
+; X86-SSE1-LABEL: length31_eq:
; X86-SSE1: # %bb.0:
; X86-SSE1-NEXT: pushl $0
-; X86-SSE1-NEXT: pushl $32
+; X86-SSE1-NEXT: pushl $31
; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: calll memcmp
@@ -1310,15 +1501,15 @@ define i1 @length32_eq(i8* %x, i8* %y) nounwind {
; X86-SSE1-NEXT: sete %al
; X86-SSE1-NEXT: retl
;
-; X86-SSE2-LABEL: length32_eq:
+; X86-SSE2-LABEL: length31_eq:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1
+; X86-SSE2-NEXT: movdqu 15(%ecx), %xmm1
; X86-SSE2-NEXT: movdqu (%eax), %xmm2
; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
+; X86-SSE2-NEXT: movdqu 15(%eax), %xmm0
; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
; X86-SSE2-NEXT: pand %xmm2, %xmm0
; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
@@ -1326,13 +1517,28 @@ define i1 @length32_eq(i8* %x, i8* %y) nounwind {
; X86-SSE2-NEXT: sete %al
; X86-SSE2-NEXT: retl
;
-; X64-SSE2-LABEL: length32_eq:
+; X86-SSE41-LABEL: length31_eq:
+; X86-SSE41: # %bb.0:
+; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
+; X86-SSE41-NEXT: movdqu 15(%ecx), %xmm1
+; X86-SSE41-NEXT: movdqu (%eax), %xmm2
+; X86-SSE41-NEXT: pxor %xmm0, %xmm2
+; X86-SSE41-NEXT: movdqu 15(%eax), %xmm0
+; X86-SSE41-NEXT: pxor %xmm1, %xmm0
+; X86-SSE41-NEXT: por %xmm2, %xmm0
+; X86-SSE41-NEXT: ptest %xmm0, %xmm0
+; X86-SSE41-NEXT: sete %al
+; X86-SSE41-NEXT: retl
+;
+; X64-SSE2-LABEL: length31_eq:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
+; X64-SSE2-NEXT: movdqu 15(%rdi), %xmm1
; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
+; X64-SSE2-NEXT: movdqu 15(%rsi), %xmm0
; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
; X64-SSE2-NEXT: pand %xmm2, %xmm0
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
@@ -1340,44 +1546,93 @@ define i1 @length32_eq(i8* %x, i8* %y) nounwind {
; X64-SSE2-NEXT: sete %al
; X64-SSE2-NEXT: retq
;
-; X64-AVX1-LABEL: length32_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX1-NEXT: vmovdqu 16(%rdi), %xmm1
-; X64-AVX1-NEXT: vpxor 16(%rsi), %xmm1, %xmm1
-; X64-AVX1-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vptest %xmm0, %xmm0
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length32_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
+; X64-SSE41-LABEL: length31_eq:
+; X64-SSE41: # %bb.0:
+; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE41-NEXT: movdqu 15(%rdi), %xmm1
+; X64-SSE41-NEXT: movdqu (%rsi), %xmm2
+; X64-SSE41-NEXT: pxor %xmm0, %xmm2
+; X64-SSE41-NEXT: movdqu 15(%rsi), %xmm0
+; X64-SSE41-NEXT: pxor %xmm1, %xmm0
+; X64-SSE41-NEXT: por %xmm2, %xmm0
+; X64-SSE41-NEXT: ptest %xmm0, %xmm0
+; X64-SSE41-NEXT: sete %al
+; X64-SSE41-NEXT: retq
;
-; X64-AVX512-LABEL: length32_eq:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX512-NEXT: vptest %ymm0, %ymm0
-; X64-AVX512-NEXT: sete %al
-; X64-AVX512-NEXT: vzeroupper
-; X64-AVX512-NEXT: retq
- %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
+; X64-AVX-LABEL: length31_eq:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
+; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
+; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: sete %al
+; X64-AVX-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 31) nounwind
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
-define i1 @length32_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"="128" {
-; X86-NOSSE-LABEL: length32_eq_prefer128:
+define i1 @length31_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length31_lt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $31
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: shrl $31, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: length31_lt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $31, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: shrl $31, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 31) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length31_gt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $31
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setg %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length31_gt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $31, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setg %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 31) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length31_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"="128" {
+; X86-NOSSE-LABEL: length31_eq_prefer128:
; X86-NOSSE: # %bb.0:
; X86-NOSSE-NEXT: pushl $0
-; X86-NOSSE-NEXT: pushl $32
+; X86-NOSSE-NEXT: pushl $31
; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: calll memcmp
@@ -1386,10 +1641,10 @@ define i1 @length32_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"=
; X86-NOSSE-NEXT: sete %al
; X86-NOSSE-NEXT: retl
;
-; X86-SSE1-LABEL: length32_eq_prefer128:
+; X86-SSE1-LABEL: length31_eq_prefer128:
; X86-SSE1: # %bb.0:
; X86-SSE1-NEXT: pushl $0
-; X86-SSE1-NEXT: pushl $32
+; X86-SSE1-NEXT: pushl $31
; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: calll memcmp
@@ -1398,15 +1653,15 @@ define i1 @length32_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"=
; X86-SSE1-NEXT: sete %al
; X86-SSE1-NEXT: retl
;
-; X86-SSE2-LABEL: length32_eq_prefer128:
+; X86-SSE2-LABEL: length31_eq_prefer128:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1
+; X86-SSE2-NEXT: movdqu 15(%ecx), %xmm1
; X86-SSE2-NEXT: movdqu (%eax), %xmm2
; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
+; X86-SSE2-NEXT: movdqu 15(%eax), %xmm0
; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
; X86-SSE2-NEXT: pand %xmm2, %xmm0
; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
@@ -1414,13 +1669,28 @@ define i1 @length32_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"=
; X86-SSE2-NEXT: sete %al
; X86-SSE2-NEXT: retl
;
-; X64-SSE2-LABEL: length32_eq_prefer128:
+; X86-SSE41-LABEL: length31_eq_prefer128:
+; X86-SSE41: # %bb.0:
+; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
+; X86-SSE41-NEXT: movdqu 15(%ecx), %xmm1
+; X86-SSE41-NEXT: movdqu (%eax), %xmm2
+; X86-SSE41-NEXT: pxor %xmm0, %xmm2
+; X86-SSE41-NEXT: movdqu 15(%eax), %xmm0
+; X86-SSE41-NEXT: pxor %xmm1, %xmm0
+; X86-SSE41-NEXT: por %xmm2, %xmm0
+; X86-SSE41-NEXT: ptest %xmm0, %xmm0
+; X86-SSE41-NEXT: sete %al
+; X86-SSE41-NEXT: retl
+;
+; X64-SSE2-LABEL: length31_eq_prefer128:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
+; X64-SSE2-NEXT: movdqu 15(%rdi), %xmm1
; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
+; X64-SSE2-NEXT: movdqu 15(%rsi), %xmm0
; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
; X64-SSE2-NEXT: pand %xmm2, %xmm0
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
@@ -1428,37 +1698,39 @@ define i1 @length32_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"=
; X64-SSE2-NEXT: sete %al
; X64-SSE2-NEXT: retq
;
-; X64-AVX-LABEL: length32_eq_prefer128:
+; X64-SSE41-LABEL: length31_eq_prefer128:
+; X64-SSE41: # %bb.0:
+; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE41-NEXT: movdqu 15(%rdi), %xmm1
+; X64-SSE41-NEXT: movdqu (%rsi), %xmm2
+; X64-SSE41-NEXT: pxor %xmm0, %xmm2
+; X64-SSE41-NEXT: movdqu 15(%rsi), %xmm0
+; X64-SSE41-NEXT: pxor %xmm1, %xmm0
+; X64-SSE41-NEXT: por %xmm2, %xmm0
+; X64-SSE41-NEXT: ptest %xmm0, %xmm0
+; X64-SSE41-NEXT: sete %al
+; X64-SSE41-NEXT: retq
+;
+; X64-AVX-LABEL: length31_eq_prefer128:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT: vmovdqu 16(%rdi), %xmm1
-; X64-AVX-NEXT: vpxor 16(%rsi), %xmm1, %xmm1
+; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
+; X64-AVX-NEXT: vpxor 15(%rsi), %xmm1, %xmm1
; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vptest %xmm0, %xmm0
; X64-AVX-NEXT: sete %al
; X64-AVX-NEXT: retq
-;
-; X64-AVX512-LABEL: length32_eq_prefer128:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu (%rdi), %xmm0
-; X64-AVX512-NEXT: vmovdqu 16(%rdi), %xmm1
-; X64-AVX512-NEXT: vpxor 16(%rsi), %xmm1, %xmm1
-; X64-AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vptest %xmm0, %xmm0
-; X64-AVX512-NEXT: sete %al
-; X64-AVX512-NEXT: retq
- %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 31) nounwind
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
-define i1 @length32_eq_const(i8* %X) nounwind {
-; X86-NOSSE-LABEL: length32_eq_const:
+define i1 @length31_eq_const(i8* %X) nounwind {
+; X86-NOSSE-LABEL: length31_eq_const:
; X86-NOSSE: # %bb.0:
; X86-NOSSE-NEXT: pushl $0
-; X86-NOSSE-NEXT: pushl $32
+; X86-NOSSE-NEXT: pushl $31
; X86-NOSSE-NEXT: pushl $.L.str
; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: calll memcmp
@@ -1467,10 +1739,10 @@ define i1 @length32_eq_const(i8* %X) nounwind {
; X86-NOSSE-NEXT: setne %al
; X86-NOSSE-NEXT: retl
;
-; X86-SSE1-LABEL: length32_eq_const:
+; X86-SSE1-LABEL: length31_eq_const:
; X86-SSE1: # %bb.0:
; X86-SSE1-NEXT: pushl $0
-; X86-SSE1-NEXT: pushl $32
+; X86-SSE1-NEXT: pushl $31
; X86-SSE1-NEXT: pushl $.L.str
; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
; X86-SSE1-NEXT: calll memcmp
@@ -1479,11 +1751,11 @@ define i1 @length32_eq_const(i8* %X) nounwind {
; X86-SSE1-NEXT: setne %al
; X86-SSE1-NEXT: retl
;
-; X86-SSE2-LABEL: length32_eq_const:
+; X86-SSE2-LABEL: length31_eq_const:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: movdqu (%eax), %xmm0
-; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
+; X86-SSE2-NEXT: movdqu 15(%eax), %xmm1
; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm1
; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
; X86-SSE2-NEXT: pand %xmm1, %xmm0
@@ -1492,10 +1764,22 @@ define i1 @length32_eq_const(i8* %X) nounwind {
; X86-SSE2-NEXT: setne %al
; X86-SSE2-NEXT: retl
;
-; X64-SSE2-LABEL: length32_eq_const:
+; X86-SSE41-LABEL: length31_eq_const:
+; X86-SSE41: # %bb.0:
+; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE41-NEXT: movdqu (%eax), %xmm0
+; X86-SSE41-NEXT: movdqu 15(%eax), %xmm1
+; X86-SSE41-NEXT: pxor {{\.LCPI.*}}, %xmm1
+; X86-SSE41-NEXT: pxor {{\.LCPI.*}}, %xmm0
+; X86-SSE41-NEXT: por %xmm1, %xmm0
+; X86-SSE41-NEXT: ptest %xmm0, %xmm0
+; X86-SSE41-NEXT: setne %al
+; X86-SSE41-NEXT: retl
+;
+; X64-SSE2-LABEL: length31_eq_const:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
+; X64-SSE2-NEXT: movdqu 15(%rdi), %xmm1
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm1
; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
; X64-SSE2-NEXT: pand %xmm1, %xmm0
@@ -1504,63 +1788,2829 @@ define i1 @length32_eq_const(i8* %X) nounwind {
; X64-SSE2-NEXT: setne %al
; X64-SSE2-NEXT: retq
;
-; X64-AVX1-LABEL: length32_eq_const:
+; X64-SSE41-LABEL: length31_eq_const:
+; X64-SSE41: # %bb.0:
+; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE41-NEXT: movdqu 15(%rdi), %xmm1
+; X64-SSE41-NEXT: pxor {{.*}}(%rip), %xmm1
+; X64-SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
+; X64-SSE41-NEXT: por %xmm1, %xmm0
+; X64-SSE41-NEXT: ptest %xmm0, %xmm0
+; X64-SSE41-NEXT: setne %al
+; X64-SSE41-NEXT: retq
+;
+; X64-AVX-LABEL: length31_eq_const:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovdqu 15(%rdi), %xmm1
+; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: setne %al
+; X64-AVX-NEXT: retq
+ %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 31) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length32(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length32:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $32
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl
+;
+; X64-LABEL: length32:
+; X64: # %bb.0:
+; X64-NEXT: movl $32, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind
+ ret i32 %m
+}
+
+; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
+
+define i1 @length32_eq(i8* %x, i8* %y) nounwind {
+; X86-NOSSE-LABEL: length32_eq:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $0
+; X86-NOSSE-NEXT: pushl $32
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $16, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE1-LABEL: length32_eq:
+; X86-SSE1: # %bb.0:
+; X86-SSE1-NEXT: pushl $0
+; X86-SSE1-NEXT: pushl $32
+; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: calll memcmp
+; X86-SSE1-NEXT: addl $16, %esp
+; X86-SSE1-NEXT: testl %eax, %eax
+; X86-SSE1-NEXT: sete %al
+; X86-SSE1-NEXT: retl
+;
+; X86-SSE2-LABEL: length32_eq:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
+; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1
+; X86-SSE2-NEXT: movdqu (%eax), %xmm2
+; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
+; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
+; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
+; X86-SSE2-NEXT: pand %xmm2, %xmm0
+; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
+; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; X86-SSE2-NEXT: sete %al
+; X86-SSE2-NEXT: retl
+;
+; X86-SSE41-LABEL: length32_eq:
+; X86-SSE41: # %bb.0:
+; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
+; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1
+; X86-SSE41-NEXT: movdqu (%eax), %xmm2
+; X86-SSE41-NEXT: pxor %xmm0, %xmm2
+; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0
+; X86-SSE41-NEXT: pxor %xmm1, %xmm0
+; X86-SSE41-NEXT: por %xmm2, %xmm0
+; X86-SSE41-NEXT: ptest %xmm0, %xmm0
+; X86-SSE41-NEXT: sete %al
+; X86-SSE41-NEXT: retl
+;
+; X64-SSE2-LABEL: length32_eq:
+; X64-SSE2: # %bb.0:
+; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
+; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
+; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
+; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
+; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
+; X64-SSE2-NEXT: pand %xmm2, %xmm0
+; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
+; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; X64-SSE2-NEXT: sete %al
+; X64-SSE2-NEXT: retq
+;
+; X64-SSE41-LABEL: length32_eq:
+; X64-SSE41: # %bb.0:
+; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1
+; X64-SSE41-NEXT: movdqu (%rsi), %xmm2
+; X64-SSE41-NEXT: pxor %xmm0, %xmm2
+; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0
+; X64-SSE41-NEXT: pxor %xmm1, %xmm0
+; X64-SSE41-NEXT: por %xmm2, %xmm0
+; X64-SSE41-NEXT: ptest %xmm0, %xmm0
+; X64-SSE41-NEXT: sete %al
+; X64-SSE41-NEXT: retq
+;
+; X64-AVX1-LABEL: length32_eq:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovdqu (%rdi), %xmm0
; X64-AVX1-NEXT: vmovdqu 16(%rdi), %xmm1
-; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm1, %xmm1
-; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor 16(%rsi), %xmm1, %xmm1
+; X64-AVX1-NEXT: vpxor (%rsi), %xmm0, %xmm0
; X64-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vptest %xmm0, %xmm0
-; X64-AVX1-NEXT: setne %al
+; X64-AVX1-NEXT: sete %al
; X64-AVX1-NEXT: retq
;
-; X64-AVX2-LABEL: length32_eq_const:
+; X64-AVX2-LABEL: length32_eq:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
+; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: setne %al
+; X64-AVX2-NEXT: sete %al
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
;
-; X64-AVX512-LABEL: length32_eq_const:
-; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
-; X64-AVX512-NEXT: vptest %ymm0, %ymm0
-; X64-AVX512-NEXT: setne %al
-; X64-AVX512-NEXT: vzeroupper
-; X64-AVX512-NEXT: retq
- %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 32) nounwind
- %c = icmp ne i32 %m, 0
+; X64-AVX512-LABEL: length32_eq:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0
+; X64-AVX512-NEXT: vptest %ymm0, %ymm0
+; X64-AVX512-NEXT: sete %al
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length32_lt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $32
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: shrl $31, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: length32_lt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $32, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: shrl $31, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length32_gt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $32
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setg %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length32_gt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $32, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setg %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"="128" {
+; X86-NOSSE-LABEL: length32_eq_prefer128:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $0
+; X86-NOSSE-NEXT: pushl $32
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $16, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: sete %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE1-LABEL: length32_eq_prefer128:
+; X86-SSE1: # %bb.0:
+; X86-SSE1-NEXT: pushl $0
+; X86-SSE1-NEXT: pushl $32
+; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: calll memcmp
+; X86-SSE1-NEXT: addl $16, %esp
+; X86-SSE1-NEXT: testl %eax, %eax
+; X86-SSE1-NEXT: sete %al
+; X86-SSE1-NEXT: retl
+;
+; X86-SSE2-LABEL: length32_eq_prefer128:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT: movdqu (%ecx), %xmm0
+; X86-SSE2-NEXT: movdqu 16(%ecx), %xmm1
+; X86-SSE2-NEXT: movdqu (%eax), %xmm2
+; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
+; X86-SSE2-NEXT: movdqu 16(%eax), %xmm0
+; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
+; X86-SSE2-NEXT: pand %xmm2, %xmm0
+; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
+; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; X86-SSE2-NEXT: sete %al
+; X86-SSE2-NEXT: retl
+;
+; X86-SSE41-LABEL: length32_eq_prefer128:
+; X86-SSE41: # %bb.0:
+; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE41-NEXT: movdqu (%ecx), %xmm0
+; X86-SSE41-NEXT: movdqu 16(%ecx), %xmm1
+; X86-SSE41-NEXT: movdqu (%eax), %xmm2
+; X86-SSE41-NEXT: pxor %xmm0, %xmm2
+; X86-SSE41-NEXT: movdqu 16(%eax), %xmm0
+; X86-SSE41-NEXT: pxor %xmm1, %xmm0
+; X86-SSE41-NEXT: por %xmm2, %xmm0
+; X86-SSE41-NEXT: ptest %xmm0, %xmm0
+; X86-SSE41-NEXT: sete %al
+; X86-SSE41-NEXT: retl
+;
+; X64-SSE2-LABEL: length32_eq_prefer128:
+; X64-SSE2: # %bb.0:
+; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
+; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
+; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
+; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
+; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
+; X64-SSE2-NEXT: pand %xmm2, %xmm0
+; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
+; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; X64-SSE2-NEXT: sete %al
+; X64-SSE2-NEXT: retq
+;
+; X64-SSE41-LABEL: length32_eq_prefer128:
+; X64-SSE41: # %bb.0:
+; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1
+; X64-SSE41-NEXT: movdqu (%rsi), %xmm2
+; X64-SSE41-NEXT: pxor %xmm0, %xmm2
+; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0
+; X64-SSE41-NEXT: pxor %xmm1, %xmm0
+; X64-SSE41-NEXT: por %xmm2, %xmm0
+; X64-SSE41-NEXT: ptest %xmm0, %xmm0
+; X64-SSE41-NEXT: sete %al
+; X64-SSE41-NEXT: retq
+;
+; X64-AVX-LABEL: length32_eq_prefer128:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vmovdqu 16(%rdi), %xmm1
+; X64-AVX-NEXT: vpxor 16(%rsi), %xmm1, %xmm1
+; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
+; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vptest %xmm0, %xmm0
+; X64-AVX-NEXT: sete %al
+; X64-AVX-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length32_eq_const(i8* %X) nounwind {
+; X86-NOSSE-LABEL: length32_eq_const:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl $0
+; X86-NOSSE-NEXT: pushl $32
+; X86-NOSSE-NEXT: pushl $.L.str
+; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: calll memcmp
+; X86-NOSSE-NEXT: addl $16, %esp
+; X86-NOSSE-NEXT: testl %eax, %eax
+; X86-NOSSE-NEXT: setne %al
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE1-LABEL: length32_eq_const:
+; X86-SSE1: # %bb.0:
+; X86-SSE1-NEXT: pushl $0
+; X86-SSE1-NEXT: pushl $32
+; X86-SSE1-NEXT: pushl $.L.str
+; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: calll memcmp
+; X86-SSE1-NEXT: addl $16, %esp
+; X86-SSE1-NEXT: testl %eax, %eax
+; X86-SSE1-NEXT: setne %al
+; X86-SSE1-NEXT: retl
+;
+; X86-SSE2-LABEL: length32_eq_const:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: movdqu (%eax), %xmm0
+; X86-SSE2-NEXT: movdqu 16(%eax), %xmm1
+; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm1
+; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0
+; X86-SSE2-NEXT: pand %xmm1, %xmm0
+; X86-SSE2-NEXT: pmovmskb %xmm0, %eax
+; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; X86-SSE2-NEXT: setne %al
+; X86-SSE2-NEXT: retl
+;
+; X86-SSE41-LABEL: length32_eq_const:
+; X86-SSE41: # %bb.0:
+; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE41-NEXT: movdqu (%eax), %xmm0
+; X86-SSE41-NEXT: movdqu 16(%eax), %xmm1
+; X86-SSE41-NEXT: pxor {{\.LCPI.*}}, %xmm1
+; X86-SSE41-NEXT: pxor {{\.LCPI.*}}, %xmm0
+; X86-SSE41-NEXT: por %xmm1, %xmm0
+; X86-SSE41-NEXT: ptest %xmm0, %xmm0
+; X86-SSE41-NEXT: setne %al
+; X86-SSE41-NEXT: retl
+;
+; X64-SSE2-LABEL: length32_eq_const:
+; X64-SSE2: # %bb.0:
+; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
+; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm1
+; X64-SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
+; X64-SSE2-NEXT: pand %xmm1, %xmm0
+; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
+; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; X64-SSE2-NEXT: setne %al
+; X64-SSE2-NEXT: retq
+;
+; X64-SSE41-LABEL: length32_eq_const:
+; X64-SSE41: # %bb.0:
+; X64-SSE41-NEXT: movdqu (%rdi), %xmm0
+; X64-SSE41-NEXT: movdqu 16(%rdi), %xmm1
+; X64-SSE41-NEXT: pxor {{.*}}(%rip), %xmm1
+; X64-SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
+; X64-SSE41-NEXT: por %xmm1, %xmm0
+; X64-SSE41-NEXT: ptest %xmm0, %xmm0
+; X64-SSE41-NEXT: setne %al
+; X64-SSE41-NEXT: retq
+;
+; X64-AVX1-LABEL: length32_eq_const:
+; X64-AVX1: # %bb.0:
+; X64-AVX1-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX1-NEXT: vmovdqu 16(%rdi), %xmm1
+; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm1, %xmm1
+; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vptest %xmm0, %xmm0
+; X64-AVX1-NEXT: setne %al
+; X64-AVX1-NEXT: retq
+;
+; X64-AVX2-LABEL: length32_eq_const:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
+; X64-AVX2-NEXT: vptest %ymm0, %ymm0
+; X64-AVX2-NEXT: setne %al
+; X64-AVX2-NEXT: vzeroupper
+; X64-AVX2-NEXT: retq
+;
+; X64-AVX512-LABEL: length32_eq_const:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
+; X64-AVX512-NEXT: vptest %ymm0, %ymm0
+; X64-AVX512-NEXT: setne %al
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+ %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 32) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length48(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length48:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $48
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl
+;
+; X64-LABEL: length48:
+; X64: # %bb.0:
+; X64-NEXT: movl $48, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 48) nounwind
+ ret i32 %m
+}
+
+define i1 @length48_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length48_eq:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $48
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-SSE-LABEL: length48_eq:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $48, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: sete %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+; X64-AVX1-LABEL: length48_eq:
+; X64-AVX1: # %bb.0:
+; X64-AVX1-NEXT: pushq %rax
+; X64-AVX1-NEXT: movl $48, %edx
+; X64-AVX1-NEXT: callq memcmp
+; X64-AVX1-NEXT: testl %eax, %eax
+; X64-AVX1-NEXT: sete %al
+; X64-AVX1-NEXT: popq %rcx
+; X64-AVX1-NEXT: retq
+;
+; X64-AVX2-LABEL: length48_eq:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: movq 32(%rdi), %rcx
+; X64-AVX2-NEXT: movq %rcx, %rax
+; X64-AVX2-NEXT: movl %ecx, %edx
+; X64-AVX2-NEXT: shrl $8, %edx
+; X64-AVX2-NEXT: vmovd %ecx, %xmm0
+; X64-AVX2-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
+; X64-AVX2-NEXT: movl %ecx, %edx
+; X64-AVX2-NEXT: shrl $16, %edx
+; X64-AVX2-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; X64-AVX2-NEXT: movl %ecx, %edx
+; X64-AVX2-NEXT: shrl $24, %edx
+; X64-AVX2-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; X64-AVX2-NEXT: movq %rcx, %rdx
+; X64-AVX2-NEXT: shrq $32, %rdx
+; X64-AVX2-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0
+; X64-AVX2-NEXT: movq %rcx, %rdx
+; X64-AVX2-NEXT: shrq $40, %rcx
+; X64-AVX2-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm1
+; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX2-NEXT: movq 40(%rdi), %rcx
+; X64-AVX2-NEXT: shrq $48, %rdx
+; X64-AVX2-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1
+; X64-AVX2-NEXT: movq %rcx, %rdx
+; X64-AVX2-NEXT: shrq $56, %rdx
+; X64-AVX2-NEXT: shrq $56, %rax
+; X64-AVX2-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; X64-AVX2-NEXT: movl %ecx, %eax
+; X64-AVX2-NEXT: shrl $8, %eax
+; X64-AVX2-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
+; X64-AVX2-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; X64-AVX2-NEXT: movl %ecx, %eax
+; X64-AVX2-NEXT: shrl $16, %eax
+; X64-AVX2-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; X64-AVX2-NEXT: movl %ecx, %eax
+; X64-AVX2-NEXT: shrl $24, %eax
+; X64-AVX2-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; X64-AVX2-NEXT: movq %rcx, %rax
+; X64-AVX2-NEXT: shrq $32, %rax
+; X64-AVX2-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; X64-AVX2-NEXT: movq %rcx, %rax
+; X64-AVX2-NEXT: shrq $48, %rax
+; X64-AVX2-NEXT: shrq $40, %rcx
+; X64-AVX2-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1
+; X64-AVX2-NEXT: movq 32(%rsi), %rcx
+; X64-AVX2-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
+; X64-AVX2-NEXT: movq %rcx, %rax
+; X64-AVX2-NEXT: vpinsrb $15, %edx, %xmm1, %xmm1
+; X64-AVX2-NEXT: movl %ecx, %edx
+; X64-AVX2-NEXT: shrl $8, %edx
+; X64-AVX2-NEXT: vmovd %ecx, %xmm2
+; X64-AVX2-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2
+; X64-AVX2-NEXT: movl %ecx, %edx
+; X64-AVX2-NEXT: shrl $16, %edx
+; X64-AVX2-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2
+; X64-AVX2-NEXT: movl %ecx, %edx
+; X64-AVX2-NEXT: shrl $24, %edx
+; X64-AVX2-NEXT: vpinsrb $3, %edx, %xmm2, %xmm2
+; X64-AVX2-NEXT: movq %rcx, %rdx
+; X64-AVX2-NEXT: shrq $32, %rdx
+; X64-AVX2-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2
+; X64-AVX2-NEXT: movq %rcx, %rdx
+; X64-AVX2-NEXT: shrq $40, %rcx
+; X64-AVX2-NEXT: vpinsrb $5, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT: movq 40(%rsi), %rcx
+; X64-AVX2-NEXT: shrq $48, %rdx
+; X64-AVX2-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
+; X64-AVX2-NEXT: movq %rcx, %rdx
+; X64-AVX2-NEXT: shrq $56, %rax
+; X64-AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; X64-AVX2-NEXT: movl %ecx, %eax
+; X64-AVX2-NEXT: shrl $8, %eax
+; X64-AVX2-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; X64-AVX2-NEXT: movl %ecx, %eax
+; X64-AVX2-NEXT: shrl $16, %eax
+; X64-AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; X64-AVX2-NEXT: movl %ecx, %eax
+; X64-AVX2-NEXT: shrl $24, %eax
+; X64-AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; X64-AVX2-NEXT: movq %rcx, %rax
+; X64-AVX2-NEXT: shrq $32, %rax
+; X64-AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; X64-AVX2-NEXT: movq %rcx, %rax
+; X64-AVX2-NEXT: shrq $40, %rcx
+; X64-AVX2-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT: shrq $48, %rax
+; X64-AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; X64-AVX2-NEXT: shrq $56, %rdx
+; X64-AVX2-NEXT: vpinsrb $15, %edx, %xmm2, %xmm2
+; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
+; X64-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vptest %ymm0, %ymm0
+; X64-AVX2-NEXT: sete %al
+; X64-AVX2-NEXT: vzeroupper
+; X64-AVX2-NEXT: retq
+;
+; X64-AVX512-LABEL: length48_eq:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: movq 32(%rdi), %rcx
+; X64-AVX512-NEXT: movq %rcx, %rax
+; X64-AVX512-NEXT: movl %ecx, %edx
+; X64-AVX512-NEXT: shrl $8, %edx
+; X64-AVX512-NEXT: vmovd %ecx, %xmm0
+; X64-AVX512-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
+; X64-AVX512-NEXT: movl %ecx, %edx
+; X64-AVX512-NEXT: shrl $16, %edx
+; X64-AVX512-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; X64-AVX512-NEXT: movl %ecx, %edx
+; X64-AVX512-NEXT: shrl $24, %edx
+; X64-AVX512-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; X64-AVX512-NEXT: movq %rcx, %rdx
+; X64-AVX512-NEXT: shrq $32, %rdx
+; X64-AVX512-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0
+; X64-AVX512-NEXT: movq %rcx, %rdx
+; X64-AVX512-NEXT: shrq $40, %rcx
+; X64-AVX512-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm1
+; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT: movq 40(%rdi), %rcx
+; X64-AVX512-NEXT: shrq $48, %rdx
+; X64-AVX512-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1
+; X64-AVX512-NEXT: movq %rcx, %rdx
+; X64-AVX512-NEXT: shrq $56, %rdx
+; X64-AVX512-NEXT: shrq $56, %rax
+; X64-AVX512-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; X64-AVX512-NEXT: movl %ecx, %eax
+; X64-AVX512-NEXT: shrl $8, %eax
+; X64-AVX512-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
+; X64-AVX512-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; X64-AVX512-NEXT: movl %ecx, %eax
+; X64-AVX512-NEXT: shrl $16, %eax
+; X64-AVX512-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; X64-AVX512-NEXT: movl %ecx, %eax
+; X64-AVX512-NEXT: shrl $24, %eax
+; X64-AVX512-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; X64-AVX512-NEXT: movq %rcx, %rax
+; X64-AVX512-NEXT: shrq $32, %rax
+; X64-AVX512-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; X64-AVX512-NEXT: movq %rcx, %rax
+; X64-AVX512-NEXT: shrq $48, %rax
+; X64-AVX512-NEXT: shrq $40, %rcx
+; X64-AVX512-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1
+; X64-AVX512-NEXT: movq 32(%rsi), %rcx
+; X64-AVX512-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
+; X64-AVX512-NEXT: movq %rcx, %rax
+; X64-AVX512-NEXT: vpinsrb $15, %edx, %xmm1, %xmm1
+; X64-AVX512-NEXT: movl %ecx, %edx
+; X64-AVX512-NEXT: shrl $8, %edx
+; X64-AVX512-NEXT: vmovd %ecx, %xmm2
+; X64-AVX512-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2
+; X64-AVX512-NEXT: movl %ecx, %edx
+; X64-AVX512-NEXT: shrl $16, %edx
+; X64-AVX512-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2
+; X64-AVX512-NEXT: movl %ecx, %edx
+; X64-AVX512-NEXT: shrl $24, %edx
+; X64-AVX512-NEXT: vpinsrb $3, %edx, %xmm2, %xmm2
+; X64-AVX512-NEXT: movq %rcx, %rdx
+; X64-AVX512-NEXT: shrq $32, %rdx
+; X64-AVX512-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2
+; X64-AVX512-NEXT: movq %rcx, %rdx
+; X64-AVX512-NEXT: shrq $40, %rcx
+; X64-AVX512-NEXT: vpinsrb $5, %ecx, %xmm2, %xmm2
+; X64-AVX512-NEXT: movq 40(%rsi), %rcx
+; X64-AVX512-NEXT: shrq $48, %rdx
+; X64-AVX512-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
+; X64-AVX512-NEXT: movq %rcx, %rdx
+; X64-AVX512-NEXT: shrq $56, %rax
+; X64-AVX512-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; X64-AVX512-NEXT: movl %ecx, %eax
+; X64-AVX512-NEXT: shrl $8, %eax
+; X64-AVX512-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2
+; X64-AVX512-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; X64-AVX512-NEXT: movl %ecx, %eax
+; X64-AVX512-NEXT: shrl $16, %eax
+; X64-AVX512-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; X64-AVX512-NEXT: movl %ecx, %eax
+; X64-AVX512-NEXT: shrl $24, %eax
+; X64-AVX512-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; X64-AVX512-NEXT: movq %rcx, %rax
+; X64-AVX512-NEXT: shrq $32, %rax
+; X64-AVX512-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; X64-AVX512-NEXT: movq %rcx, %rax
+; X64-AVX512-NEXT: shrq $40, %rcx
+; X64-AVX512-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2
+; X64-AVX512-NEXT: shrq $48, %rax
+; X64-AVX512-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; X64-AVX512-NEXT: shrq $56, %rdx
+; X64-AVX512-NEXT: vpinsrb $15, %edx, %xmm2, %xmm2
+; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0
+; X64-AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vptest %ymm0, %ymm0
+; X64-AVX512-NEXT: sete %al
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 48) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length48_lt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $48
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: shrl $31, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: length48_lt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $48, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: shrl $31, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 48) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length48_gt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $48
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setg %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length48_gt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $48, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setg %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 48) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"="128" {
+; X86-LABEL: length48_eq_prefer128:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $48
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length48_eq_prefer128:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $48, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: sete %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 48) nounwind
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length48_eq_const(i8* %X) nounwind {
+; X86-LABEL: length48_eq_const:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $48
+; X86-NEXT: pushl $.L.str
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+;
+; X64-SSE-LABEL: length48_eq_const:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $.L.str, %esi
+; X64-SSE-NEXT: movl $48, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: setne %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+; X64-AVX1-LABEL: length48_eq_const:
+; X64-AVX1: # %bb.0:
+; X64-AVX1-NEXT: pushq %rax
+; X64-AVX1-NEXT: movl $.L.str, %esi
+; X64-AVX1-NEXT: movl $48, %edx
+; X64-AVX1-NEXT: callq memcmp
+; X64-AVX1-NEXT: testl %eax, %eax
+; X64-AVX1-NEXT: setne %al
+; X64-AVX1-NEXT: popq %rcx
+; X64-AVX1-NEXT: retq
+;
+; X64-AVX2-LABEL: length48_eq_const:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: pushq %rbp
+; X64-AVX2-NEXT: pushq %r15
+; X64-AVX2-NEXT: pushq %r14
+; X64-AVX2-NEXT: pushq %r12
+; X64-AVX2-NEXT: pushq %rbx
+; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX2-NEXT: movq 40(%rdi), %rcx
+; X64-AVX2-NEXT: movq %rcx, %r8
+; X64-AVX2-NEXT: shrq $56, %r8
+; X64-AVX2-NEXT: movq %rcx, %r9
+; X64-AVX2-NEXT: shrq $48, %r9
+; X64-AVX2-NEXT: movq %rcx, %r10
+; X64-AVX2-NEXT: shrq $32, %r10
+; X64-AVX2-NEXT: movl %ecx, %r11d
+; X64-AVX2-NEXT: shrl $24, %r11d
+; X64-AVX2-NEXT: movl %ecx, %r14d
+; X64-AVX2-NEXT: shrl $16, %r14d
+; X64-AVX2-NEXT: movl %ecx, %r15d
+; X64-AVX2-NEXT: shrl $8, %r15d
+; X64-AVX2-NEXT: movq 32(%rdi), %rdi
+; X64-AVX2-NEXT: movq %rdi, %r12
+; X64-AVX2-NEXT: shrq $56, %r12
+; X64-AVX2-NEXT: movq %rdi, %rbx
+; X64-AVX2-NEXT: shrq $48, %rbx
+; X64-AVX2-NEXT: movq %rdi, %rdx
+; X64-AVX2-NEXT: shrq $32, %rdx
+; X64-AVX2-NEXT: movl %edi, %ebp
+; X64-AVX2-NEXT: shrl $24, %ebp
+; X64-AVX2-NEXT: movl %edi, %esi
+; X64-AVX2-NEXT: shrl $16, %esi
+; X64-AVX2-NEXT: vmovd %edi, %xmm1
+; X64-AVX2-NEXT: movl %edi, %eax
+; X64-AVX2-NEXT: shrl $8, %eax
+; X64-AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; X64-AVX2-NEXT: vpinsrb $2, %esi, %xmm1, %xmm1
+; X64-AVX2-NEXT: vpinsrb $3, %ebp, %xmm1, %xmm1
+; X64-AVX2-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
+; X64-AVX2-NEXT: shrq $40, %rdi
+; X64-AVX2-NEXT: vpinsrb $5, %edi, %xmm1, %xmm1
+; X64-AVX2-NEXT: vpinsrb $6, %ebx, %xmm1, %xmm1
+; X64-AVX2-NEXT: vpinsrb $7, %r12d, %xmm1, %xmm1
+; X64-AVX2-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
+; X64-AVX2-NEXT: vpinsrb $9, %r15d, %xmm1, %xmm1
+; X64-AVX2-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1
+; X64-AVX2-NEXT: vpinsrb $11, %r11d, %xmm1, %xmm1
+; X64-AVX2-NEXT: vpinsrb $12, %r10d, %xmm1, %xmm1
+; X64-AVX2-NEXT: shrq $40, %rcx
+; X64-AVX2-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1
+; X64-AVX2-NEXT: vpinsrb $14, %r9d, %xmm1, %xmm1
+; X64-AVX2-NEXT: vpinsrb $15, %r8d, %xmm1, %xmm1
+; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1
+; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
+; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vptest %ymm0, %ymm0
+; X64-AVX2-NEXT: setne %al
+; X64-AVX2-NEXT: popq %rbx
+; X64-AVX2-NEXT: popq %r12
+; X64-AVX2-NEXT: popq %r14
+; X64-AVX2-NEXT: popq %r15
+; X64-AVX2-NEXT: popq %rbp
+; X64-AVX2-NEXT: vzeroupper
+; X64-AVX2-NEXT: retq
+;
+; X64-AVX512-LABEL: length48_eq_const:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: pushq %rbp
+; X64-AVX512-NEXT: pushq %r15
+; X64-AVX512-NEXT: pushq %r14
+; X64-AVX512-NEXT: pushq %r12
+; X64-AVX512-NEXT: pushq %rbx
+; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT: movq 40(%rdi), %rcx
+; X64-AVX512-NEXT: movq %rcx, %r8
+; X64-AVX512-NEXT: shrq $56, %r8
+; X64-AVX512-NEXT: movq %rcx, %r9
+; X64-AVX512-NEXT: shrq $48, %r9
+; X64-AVX512-NEXT: movq %rcx, %r10
+; X64-AVX512-NEXT: shrq $32, %r10
+; X64-AVX512-NEXT: movl %ecx, %r11d
+; X64-AVX512-NEXT: shrl $24, %r11d
+; X64-AVX512-NEXT: movl %ecx, %r14d
+; X64-AVX512-NEXT: shrl $16, %r14d
+; X64-AVX512-NEXT: movl %ecx, %r15d
+; X64-AVX512-NEXT: shrl $8, %r15d
+; X64-AVX512-NEXT: movq 32(%rdi), %rdi
+; X64-AVX512-NEXT: movq %rdi, %r12
+; X64-AVX512-NEXT: shrq $56, %r12
+; X64-AVX512-NEXT: movq %rdi, %rbx
+; X64-AVX512-NEXT: shrq $48, %rbx
+; X64-AVX512-NEXT: movq %rdi, %rdx
+; X64-AVX512-NEXT: shrq $32, %rdx
+; X64-AVX512-NEXT: movl %edi, %ebp
+; X64-AVX512-NEXT: shrl $24, %ebp
+; X64-AVX512-NEXT: movl %edi, %esi
+; X64-AVX512-NEXT: shrl $16, %esi
+; X64-AVX512-NEXT: vmovd %edi, %xmm1
+; X64-AVX512-NEXT: movl %edi, %eax
+; X64-AVX512-NEXT: shrl $8, %eax
+; X64-AVX512-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; X64-AVX512-NEXT: vpinsrb $2, %esi, %xmm1, %xmm1
+; X64-AVX512-NEXT: vpinsrb $3, %ebp, %xmm1, %xmm1
+; X64-AVX512-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
+; X64-AVX512-NEXT: shrq $40, %rdi
+; X64-AVX512-NEXT: vpinsrb $5, %edi, %xmm1, %xmm1
+; X64-AVX512-NEXT: vpinsrb $6, %ebx, %xmm1, %xmm1
+; X64-AVX512-NEXT: vpinsrb $7, %r12d, %xmm1, %xmm1
+; X64-AVX512-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
+; X64-AVX512-NEXT: vpinsrb $9, %r15d, %xmm1, %xmm1
+; X64-AVX512-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1
+; X64-AVX512-NEXT: vpinsrb $11, %r11d, %xmm1, %xmm1
+; X64-AVX512-NEXT: vpinsrb $12, %r10d, %xmm1, %xmm1
+; X64-AVX512-NEXT: shrq $40, %rcx
+; X64-AVX512-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1
+; X64-AVX512-NEXT: vpinsrb $14, %r9d, %xmm1, %xmm1
+; X64-AVX512-NEXT: vpinsrb $15, %r8d, %xmm1, %xmm1
+; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1
+; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
+; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vptest %ymm0, %ymm0
+; X64-AVX512-NEXT: setne %al
+; X64-AVX512-NEXT: popq %rbx
+; X64-AVX512-NEXT: popq %r12
+; X64-AVX512-NEXT: popq %r14
+; X64-AVX512-NEXT: popq %r15
+; X64-AVX512-NEXT: popq %rbp
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+ %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 48) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length63(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length63:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $63
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl
+;
+; X64-LABEL: length63:
+; X64: # %bb.0:
+; X64-NEXT: movl $63, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 63) nounwind
+ ret i32 %m
+}
+
+define i1 @length63_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length63_eq:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $63
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+;
+; X64-SSE-LABEL: length63_eq:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $63, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: setne %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+; X64-AVX1-LABEL: length63_eq:
+; X64-AVX1: # %bb.0:
+; X64-AVX1-NEXT: pushq %rax
+; X64-AVX1-NEXT: movl $63, %edx
+; X64-AVX1-NEXT: callq memcmp
+; X64-AVX1-NEXT: testl %eax, %eax
+; X64-AVX1-NEXT: setne %al
+; X64-AVX1-NEXT: popq %rcx
+; X64-AVX1-NEXT: retq
+;
+; X64-AVX2-LABEL: length63_eq:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX2-NEXT: vmovdqu 31(%rdi), %ymm1
+; X64-AVX2-NEXT: vpxor 31(%rsi), %ymm1, %ymm1
+; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
+; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vptest %ymm0, %ymm0
+; X64-AVX2-NEXT: setne %al
+; X64-AVX2-NEXT: vzeroupper
+; X64-AVX2-NEXT: retq
+;
+; X64-AVX512-LABEL: length63_eq:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1
+; X64-AVX512-NEXT: vpxor 31(%rsi), %ymm1, %ymm1
+; X64-AVX512-NEXT: vpxor (%rsi), %ymm0, %ymm0
+; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vptest %ymm0, %ymm0
+; X64-AVX512-NEXT: setne %al
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 63) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length63_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length63_lt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $63
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: shrl $31, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: length63_lt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $63, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: shrl $31, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 63) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length63_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length63_gt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $63
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setg %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length63_gt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $63, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setg %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 63) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length63_eq_const(i8* %X) nounwind {
+; X86-LABEL: length63_eq_const:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $63
+; X86-NEXT: pushl $.L.str
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-SSE-LABEL: length63_eq_const:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $.L.str, %esi
+; X64-SSE-NEXT: movl $63, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: sete %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+; X64-AVX1-LABEL: length63_eq_const:
+; X64-AVX1: # %bb.0:
+; X64-AVX1-NEXT: pushq %rax
+; X64-AVX1-NEXT: movl $.L.str, %esi
+; X64-AVX1-NEXT: movl $63, %edx
+; X64-AVX1-NEXT: callq memcmp
+; X64-AVX1-NEXT: testl %eax, %eax
+; X64-AVX1-NEXT: sete %al
+; X64-AVX1-NEXT: popq %rcx
+; X64-AVX1-NEXT: retq
+;
+; X64-AVX2-LABEL: length63_eq_const:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX2-NEXT: vmovdqu 31(%rdi), %ymm1
+; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1
+; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
+; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vptest %ymm0, %ymm0
+; X64-AVX2-NEXT: sete %al
+; X64-AVX2-NEXT: vzeroupper
+; X64-AVX2-NEXT: retq
+;
+; X64-AVX512-LABEL: length63_eq_const:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT: vmovdqu 31(%rdi), %ymm1
+; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1
+; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
+; X64-AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vptest %ymm0, %ymm0
+; X64-AVX512-NEXT: sete %al
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
+ %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 63) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length64(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length64:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $64
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl
+;
+; X64-LABEL: length64:
+; X64: # %bb.0:
+; X64-NEXT: movl $64, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 64) nounwind
+ ret i32 %m
+}
+
+define i1 @length64_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length64_eq:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $64
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+;
+; X64-SSE-LABEL: length64_eq:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $64, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: setne %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+; X64-AVX1-LABEL: length64_eq:
+; X64-AVX1: # %bb.0:
+; X64-AVX1-NEXT: pushq %rax
+; X64-AVX1-NEXT: movl $64, %edx
+; X64-AVX1-NEXT: callq memcmp
+; X64-AVX1-NEXT: testl %eax, %eax
+; X64-AVX1-NEXT: setne %al
+; X64-AVX1-NEXT: popq %rcx
+; X64-AVX1-NEXT: retq
+;
+; X64-AVX2-LABEL: length64_eq:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
+; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1
+; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
+; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vptest %ymm0, %ymm0
+; X64-AVX2-NEXT: setne %al
+; X64-AVX2-NEXT: vzeroupper
+; X64-AVX2-NEXT: retq
+;
+; X64-AVX512F-LABEL: length64_eq:
+; X64-AVX512F: # %bb.0:
+; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k0
+; X64-AVX512F-NEXT: kortestw %k0, %k0
+; X64-AVX512F-NEXT: setae %al
+; X64-AVX512F-NEXT: vzeroupper
+; X64-AVX512F-NEXT: retq
+;
+; X64-AVX512BW-LABEL: length64_eq:
+; X64-AVX512BW: # %bb.0:
+; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k0
+; X64-AVX512BW-NEXT: kortestq %k0, %k0
+; X64-AVX512BW-NEXT: setae %al
+; X64-AVX512BW-NEXT: vzeroupper
+; X64-AVX512BW-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length64_lt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $64
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: shrl $31, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: length64_lt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $64, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: shrl $31, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length64_gt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $64
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setg %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length64_gt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $64, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setg %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length64_eq_const(i8* %X) nounwind {
+; X86-LABEL: length64_eq_const:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $64
+; X86-NEXT: pushl $.L.str
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-SSE-LABEL: length64_eq_const:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $.L.str, %esi
+; X64-SSE-NEXT: movl $64, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: sete %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+; X64-AVX1-LABEL: length64_eq_const:
+; X64-AVX1: # %bb.0:
+; X64-AVX1-NEXT: pushq %rax
+; X64-AVX1-NEXT: movl $.L.str, %esi
+; X64-AVX1-NEXT: movl $64, %edx
+; X64-AVX1-NEXT: callq memcmp
+; X64-AVX1-NEXT: testl %eax, %eax
+; X64-AVX1-NEXT: sete %al
+; X64-AVX1-NEXT: popq %rcx
+; X64-AVX1-NEXT: retq
+;
+; X64-AVX2-LABEL: length64_eq_const:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
+; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1
+; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
+; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vptest %ymm0, %ymm0
+; X64-AVX2-NEXT: sete %al
+; X64-AVX2-NEXT: vzeroupper
+; X64-AVX2-NEXT: retq
+;
+; X64-AVX512F-LABEL: length64_eq_const:
+; X64-AVX512F: # %bb.0:
+; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512F-NEXT: vpcmpeqd {{.*}}(%rip), %zmm0, %k0
+; X64-AVX512F-NEXT: kortestw %k0, %k0
+; X64-AVX512F-NEXT: setb %al
+; X64-AVX512F-NEXT: vzeroupper
+; X64-AVX512F-NEXT: retq
+;
+; X64-AVX512BW-LABEL: length64_eq_const:
+; X64-AVX512BW: # %bb.0:
+; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512BW-NEXT: vpcmpeqb {{.*}}(%rip), %zmm0, %k0
+; X64-AVX512BW-NEXT: kortestq %k0, %k0
+; X64-AVX512BW-NEXT: setb %al
+; X64-AVX512BW-NEXT: vzeroupper
+; X64-AVX512BW-NEXT: retq
+ %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length96(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length96:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $96
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl
+;
+; X64-LABEL: length96:
+; X64: # %bb.0:
+; X64-NEXT: movl $96, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 96) nounwind
+ ret i32 %m
+}
+
+define i1 @length96_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length96_eq:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $96
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+;
+; X64-SSE-LABEL: length96_eq:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $96, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: setne %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+; X64-AVX1-LABEL: length96_eq:
+; X64-AVX1: # %bb.0:
+; X64-AVX1-NEXT: pushq %rax
+; X64-AVX1-NEXT: movl $96, %edx
+; X64-AVX1-NEXT: callq memcmp
+; X64-AVX1-NEXT: testl %eax, %eax
+; X64-AVX1-NEXT: setne %al
+; X64-AVX1-NEXT: popq %rcx
+; X64-AVX1-NEXT: retq
+;
+; X64-AVX2-LABEL: length96_eq:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: pushq %rax
+; X64-AVX2-NEXT: movl $96, %edx
+; X64-AVX2-NEXT: callq memcmp
+; X64-AVX2-NEXT: testl %eax, %eax
+; X64-AVX2-NEXT: setne %al
+; X64-AVX2-NEXT: popq %rcx
+; X64-AVX2-NEXT: retq
+;
+; X64-AVX512F-LABEL: length96_eq:
+; X64-AVX512F: # %bb.0:
+; X64-AVX512F-NEXT: movq 80(%rdi), %rax
+; X64-AVX512F-NEXT: vmovd %eax, %xmm0
+; X64-AVX512F-NEXT: shrq $32, %rax
+; X64-AVX512F-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; X64-AVX512F-NEXT: movq 88(%rdi), %rax
+; X64-AVX512F-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
+; X64-AVX512F-NEXT: shrq $32, %rax
+; X64-AVX512F-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; X64-AVX512F-NEXT: movq 64(%rdi), %rax
+; X64-AVX512F-NEXT: vmovd %eax, %xmm1
+; X64-AVX512F-NEXT: shrq $32, %rax
+; X64-AVX512F-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm2
+; X64-AVX512F-NEXT: movq 72(%rdi), %rax
+; X64-AVX512F-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
+; X64-AVX512F-NEXT: shrq $32, %rax
+; X64-AVX512F-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
+; X64-AVX512F-NEXT: movq 80(%rsi), %rax
+; X64-AVX512F-NEXT: vmovd %eax, %xmm3
+; X64-AVX512F-NEXT: shrq $32, %rax
+; X64-AVX512F-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
+; X64-AVX512F-NEXT: movq 88(%rsi), %rax
+; X64-AVX512F-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
+; X64-AVX512F-NEXT: shrq $32, %rax
+; X64-AVX512F-NEXT: vpinsrd $3, %eax, %xmm3, %xmm3
+; X64-AVX512F-NEXT: movq 64(%rsi), %rax
+; X64-AVX512F-NEXT: vmovd %eax, %xmm4
+; X64-AVX512F-NEXT: shrq $32, %rax
+; X64-AVX512F-NEXT: vpinsrd $1, %eax, %xmm4, %xmm4
+; X64-AVX512F-NEXT: movq 72(%rsi), %rax
+; X64-AVX512F-NEXT: vpinsrd $2, %eax, %xmm4, %xmm4
+; X64-AVX512F-NEXT: shrq $32, %rax
+; X64-AVX512F-NEXT: vpinsrd $3, %eax, %xmm4, %xmm4
+; X64-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; X64-AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm1
+; X64-AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
+; X64-AVX512F-NEXT: vpcmpeqd (%rsi), %zmm2, %k0 {%k1}
+; X64-AVX512F-NEXT: kortestw %k0, %k0
+; X64-AVX512F-NEXT: setae %al
+; X64-AVX512F-NEXT: vzeroupper
+; X64-AVX512F-NEXT: retq
+;
+; X64-AVX512BW-LABEL: length96_eq:
+; X64-AVX512BW: # %bb.0:
+; X64-AVX512BW-NEXT: movq 80(%rdi), %rcx
+; X64-AVX512BW-NEXT: movq %rcx, %rax
+; X64-AVX512BW-NEXT: movl %ecx, %edx
+; X64-AVX512BW-NEXT: shrl $8, %edx
+; X64-AVX512BW-NEXT: vmovd %ecx, %xmm0
+; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movl %ecx, %edx
+; X64-AVX512BW-NEXT: shrl $16, %edx
+; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movl %ecx, %edx
+; X64-AVX512BW-NEXT: shrl $24, %edx
+; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movq %rcx, %rdx
+; X64-AVX512BW-NEXT: shrq $32, %rdx
+; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movq %rcx, %rdx
+; X64-AVX512BW-NEXT: shrq $40, %rcx
+; X64-AVX512BW-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movq 88(%rdi), %rcx
+; X64-AVX512BW-NEXT: shrq $48, %rdx
+; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movq %rcx, %rdx
+; X64-AVX512BW-NEXT: shrq $56, %rax
+; X64-AVX512BW-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movl %ecx, %eax
+; X64-AVX512BW-NEXT: shrl $8, %eax
+; X64-AVX512BW-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movl %ecx, %eax
+; X64-AVX512BW-NEXT: shrl $16, %eax
+; X64-AVX512BW-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movl %ecx, %eax
+; X64-AVX512BW-NEXT: shrl $24, %eax
+; X64-AVX512BW-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movq %rcx, %rax
+; X64-AVX512BW-NEXT: shrq $32, %rax
+; X64-AVX512BW-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movq %rcx, %rax
+; X64-AVX512BW-NEXT: shrq $40, %rcx
+; X64-AVX512BW-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movq 64(%rdi), %rcx
+; X64-AVX512BW-NEXT: shrq $48, %rax
+; X64-AVX512BW-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movq %rcx, %rax
+; X64-AVX512BW-NEXT: shrq $56, %rdx
+; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movl %ecx, %edx
+; X64-AVX512BW-NEXT: shrl $8, %edx
+; X64-AVX512BW-NEXT: vmovd %ecx, %xmm1
+; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: movl %ecx, %edx
+; X64-AVX512BW-NEXT: shrl $16, %edx
+; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: movl %ecx, %edx
+; X64-AVX512BW-NEXT: shrl $24, %edx
+; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: movq %rcx, %rdx
+; X64-AVX512BW-NEXT: shrq $32, %rdx
+; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: movq %rcx, %rdx
+; X64-AVX512BW-NEXT: shrq $40, %rcx
+; X64-AVX512BW-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm2
+; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm1
+; X64-AVX512BW-NEXT: movq 72(%rdi), %rcx
+; X64-AVX512BW-NEXT: shrq $48, %rdx
+; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
+; X64-AVX512BW-NEXT: movq %rcx, %rdx
+; X64-AVX512BW-NEXT: shrq $56, %rdx
+; X64-AVX512BW-NEXT: shrq $56, %rax
+; X64-AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; X64-AVX512BW-NEXT: movl %ecx, %eax
+; X64-AVX512BW-NEXT: shrl $8, %eax
+; X64-AVX512BW-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2
+; X64-AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; X64-AVX512BW-NEXT: movl %ecx, %eax
+; X64-AVX512BW-NEXT: shrl $16, %eax
+; X64-AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; X64-AVX512BW-NEXT: movl %ecx, %eax
+; X64-AVX512BW-NEXT: shrl $24, %eax
+; X64-AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; X64-AVX512BW-NEXT: movq %rcx, %rax
+; X64-AVX512BW-NEXT: shrq $32, %rax
+; X64-AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; X64-AVX512BW-NEXT: movq %rcx, %rax
+; X64-AVX512BW-NEXT: shrq $48, %rax
+; X64-AVX512BW-NEXT: shrq $40, %rcx
+; X64-AVX512BW-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2
+; X64-AVX512BW-NEXT: movq 80(%rsi), %rcx
+; X64-AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; X64-AVX512BW-NEXT: movq %rcx, %rax
+; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm2, %xmm2
+; X64-AVX512BW-NEXT: movl %ecx, %edx
+; X64-AVX512BW-NEXT: shrl $8, %edx
+; X64-AVX512BW-NEXT: vmovd %ecx, %xmm3
+; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3
+; X64-AVX512BW-NEXT: movl %ecx, %edx
+; X64-AVX512BW-NEXT: shrl $16, %edx
+; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm3, %xmm3
+; X64-AVX512BW-NEXT: movl %ecx, %edx
+; X64-AVX512BW-NEXT: shrl $24, %edx
+; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm3, %xmm3
+; X64-AVX512BW-NEXT: movq %rcx, %rdx
+; X64-AVX512BW-NEXT: shrq $32, %rdx
+; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm3, %xmm3
+; X64-AVX512BW-NEXT: movq %rcx, %rdx
+; X64-AVX512BW-NEXT: shrq $40, %rcx
+; X64-AVX512BW-NEXT: vpinsrb $5, %ecx, %xmm3, %xmm3
+; X64-AVX512BW-NEXT: movq 88(%rsi), %rcx
+; X64-AVX512BW-NEXT: shrq $48, %rdx
+; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm3, %xmm3
+; X64-AVX512BW-NEXT: movq %rcx, %rdx
+; X64-AVX512BW-NEXT: shrq $56, %rax
+; X64-AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
+; X64-AVX512BW-NEXT: movl %ecx, %eax
+; X64-AVX512BW-NEXT: shrl $8, %eax
+; X64-AVX512BW-NEXT: vpinsrb $8, %ecx, %xmm3, %xmm3
+; X64-AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
+; X64-AVX512BW-NEXT: movl %ecx, %eax
+; X64-AVX512BW-NEXT: shrl $16, %eax
+; X64-AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
+; X64-AVX512BW-NEXT: movl %ecx, %eax
+; X64-AVX512BW-NEXT: shrl $24, %eax
+; X64-AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
+; X64-AVX512BW-NEXT: movq %rcx, %rax
+; X64-AVX512BW-NEXT: shrq $32, %rax
+; X64-AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; X64-AVX512BW-NEXT: movq %rcx, %rax
+; X64-AVX512BW-NEXT: shrq $40, %rcx
+; X64-AVX512BW-NEXT: vpinsrb $13, %ecx, %xmm3, %xmm3
+; X64-AVX512BW-NEXT: movq 64(%rsi), %rcx
+; X64-AVX512BW-NEXT: shrq $48, %rax
+; X64-AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
+; X64-AVX512BW-NEXT: movq %rcx, %rax
+; X64-AVX512BW-NEXT: shrq $56, %rdx
+; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm3, %xmm3
+; X64-AVX512BW-NEXT: movl %ecx, %edx
+; X64-AVX512BW-NEXT: shrl $8, %edx
+; X64-AVX512BW-NEXT: vmovd %ecx, %xmm4
+; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm4, %xmm4
+; X64-AVX512BW-NEXT: movl %ecx, %edx
+; X64-AVX512BW-NEXT: shrl $16, %edx
+; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm4, %xmm4
+; X64-AVX512BW-NEXT: movl %ecx, %edx
+; X64-AVX512BW-NEXT: shrl $24, %edx
+; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm4, %xmm4
+; X64-AVX512BW-NEXT: movq %rcx, %rdx
+; X64-AVX512BW-NEXT: shrq $32, %rdx
+; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm4, %xmm4
+; X64-AVX512BW-NEXT: movq %rcx, %rdx
+; X64-AVX512BW-NEXT: shrq $40, %rcx
+; X64-AVX512BW-NEXT: vpinsrb $5, %ecx, %xmm4, %xmm4
+; X64-AVX512BW-NEXT: movq 72(%rsi), %rcx
+; X64-AVX512BW-NEXT: shrq $48, %rdx
+; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm4, %xmm4
+; X64-AVX512BW-NEXT: movq %rcx, %rdx
+; X64-AVX512BW-NEXT: shrq $56, %rax
+; X64-AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
+; X64-AVX512BW-NEXT: movl %ecx, %eax
+; X64-AVX512BW-NEXT: shrl $8, %eax
+; X64-AVX512BW-NEXT: vpinsrb $8, %ecx, %xmm4, %xmm4
+; X64-AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
+; X64-AVX512BW-NEXT: movl %ecx, %eax
+; X64-AVX512BW-NEXT: shrl $16, %eax
+; X64-AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
+; X64-AVX512BW-NEXT: movl %ecx, %eax
+; X64-AVX512BW-NEXT: shrl $24, %eax
+; X64-AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
+; X64-AVX512BW-NEXT: movq %rcx, %rax
+; X64-AVX512BW-NEXT: shrq $32, %rax
+; X64-AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
+; X64-AVX512BW-NEXT: movq %rcx, %rax
+; X64-AVX512BW-NEXT: shrq $40, %rcx
+; X64-AVX512BW-NEXT: vpinsrb $13, %ecx, %xmm4, %xmm4
+; X64-AVX512BW-NEXT: shrq $48, %rax
+; X64-AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4
+; X64-AVX512BW-NEXT: shrq $56, %rdx
+; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm4, %xmm4
+; X64-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0
+; X64-AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm2
+; X64-AVX512BW-NEXT: vpcmpeqb %zmm2, %zmm0, %k1
+; X64-AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm1, %k0 {%k1}
+; X64-AVX512BW-NEXT: kortestq %k0, %k0
+; X64-AVX512BW-NEXT: setae %al
+; X64-AVX512BW-NEXT: vzeroupper
+; X64-AVX512BW-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 96) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length96_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length96_lt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $96
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: shrl $31, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: length96_lt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $96, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: shrl $31, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 96) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length96_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length96_gt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $96
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setg %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length96_gt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $96, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setg %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 96) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length96_eq_const(i8* %X) nounwind {
+; X86-LABEL: length96_eq_const:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $96
+; X86-NEXT: pushl $.L.str
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-SSE-LABEL: length96_eq_const:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $.L.str, %esi
+; X64-SSE-NEXT: movl $96, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: sete %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+; X64-AVX1-LABEL: length96_eq_const:
+; X64-AVX1: # %bb.0:
+; X64-AVX1-NEXT: pushq %rax
+; X64-AVX1-NEXT: movl $.L.str, %esi
+; X64-AVX1-NEXT: movl $96, %edx
+; X64-AVX1-NEXT: callq memcmp
+; X64-AVX1-NEXT: testl %eax, %eax
+; X64-AVX1-NEXT: sete %al
+; X64-AVX1-NEXT: popq %rcx
+; X64-AVX1-NEXT: retq
+;
+; X64-AVX2-LABEL: length96_eq_const:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: pushq %rax
+; X64-AVX2-NEXT: movl $.L.str, %esi
+; X64-AVX2-NEXT: movl $96, %edx
+; X64-AVX2-NEXT: callq memcmp
+; X64-AVX2-NEXT: testl %eax, %eax
+; X64-AVX2-NEXT: sete %al
+; X64-AVX2-NEXT: popq %rcx
+; X64-AVX2-NEXT: retq
+;
+; X64-AVX512F-LABEL: length96_eq_const:
+; X64-AVX512F: # %bb.0:
+; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512F-NEXT: movq 72(%rdi), %rax
+; X64-AVX512F-NEXT: movq 64(%rdi), %rcx
+; X64-AVX512F-NEXT: vmovd %ecx, %xmm1
+; X64-AVX512F-NEXT: shrq $32, %rcx
+; X64-AVX512F-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
+; X64-AVX512F-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
+; X64-AVX512F-NEXT: shrq $32, %rax
+; X64-AVX512F-NEXT: movq 88(%rdi), %rcx
+; X64-AVX512F-NEXT: movq 80(%rdi), %rdx
+; X64-AVX512F-NEXT: vmovd %edx, %xmm2
+; X64-AVX512F-NEXT: shrq $32, %rdx
+; X64-AVX512F-NEXT: vpinsrd $1, %edx, %xmm2, %xmm2
+; X64-AVX512F-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
+; X64-AVX512F-NEXT: shrq $32, %rcx
+; X64-AVX512F-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm2
+; X64-AVX512F-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
+; X64-AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; X64-AVX512F-NEXT: vpcmpeqd {{.*}}(%rip), %zmm0, %k1
+; X64-AVX512F-NEXT: vpcmpeqd {{.*}}(%rip), %zmm1, %k0 {%k1}
+; X64-AVX512F-NEXT: kortestw %k0, %k0
+; X64-AVX512F-NEXT: setb %al
+; X64-AVX512F-NEXT: vzeroupper
+; X64-AVX512F-NEXT: retq
+;
+; X64-AVX512BW-LABEL: length96_eq_const:
+; X64-AVX512BW: # %bb.0:
+; X64-AVX512BW-NEXT: movq 80(%rdi), %rax
+; X64-AVX512BW-NEXT: movq %rax, %rcx
+; X64-AVX512BW-NEXT: vmovd %eax, %xmm0
+; X64-AVX512BW-NEXT: movl %eax, %edx
+; X64-AVX512BW-NEXT: shrl $8, %edx
+; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movl %eax, %edx
+; X64-AVX512BW-NEXT: shrl $16, %edx
+; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movl %eax, %edx
+; X64-AVX512BW-NEXT: shrl $24, %edx
+; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movq %rax, %rdx
+; X64-AVX512BW-NEXT: shrq $32, %rdx
+; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movq %rax, %rdx
+; X64-AVX512BW-NEXT: shrq $40, %rax
+; X64-AVX512BW-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movq 88(%rdi), %rax
+; X64-AVX512BW-NEXT: shrq $48, %rdx
+; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movq %rax, %rdx
+; X64-AVX512BW-NEXT: shrq $56, %rcx
+; X64-AVX512BW-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movl %eax, %ecx
+; X64-AVX512BW-NEXT: shrl $8, %ecx
+; X64-AVX512BW-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movl %eax, %ecx
+; X64-AVX512BW-NEXT: shrl $16, %ecx
+; X64-AVX512BW-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movl %eax, %ecx
+; X64-AVX512BW-NEXT: shrl $24, %ecx
+; X64-AVX512BW-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movq %rax, %rcx
+; X64-AVX512BW-NEXT: shrq $32, %rcx
+; X64-AVX512BW-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movq %rax, %rcx
+; X64-AVX512BW-NEXT: shrq $40, %rax
+; X64-AVX512BW-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movq 64(%rdi), %rax
+; X64-AVX512BW-NEXT: shrq $48, %rcx
+; X64-AVX512BW-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movq %rax, %rcx
+; X64-AVX512BW-NEXT: shrq $56, %rdx
+; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT: movl %eax, %edx
+; X64-AVX512BW-NEXT: shrl $8, %edx
+; X64-AVX512BW-NEXT: vmovd %eax, %xmm1
+; X64-AVX512BW-NEXT: vpinsrb $1, %edx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: movl %eax, %edx
+; X64-AVX512BW-NEXT: shrl $16, %edx
+; X64-AVX512BW-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: movl %eax, %edx
+; X64-AVX512BW-NEXT: shrl $24, %edx
+; X64-AVX512BW-NEXT: vpinsrb $3, %edx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: movq %rax, %rdx
+; X64-AVX512BW-NEXT: shrq $32, %rdx
+; X64-AVX512BW-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: movq %rax, %rdx
+; X64-AVX512BW-NEXT: shrq $40, %rax
+; X64-AVX512BW-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm2
+; X64-AVX512BW-NEXT: movq 72(%rdi), %rax
+; X64-AVX512BW-NEXT: shrq $48, %rdx
+; X64-AVX512BW-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: movq %rax, %rdx
+; X64-AVX512BW-NEXT: shrq $56, %rcx
+; X64-AVX512BW-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: movl %eax, %ecx
+; X64-AVX512BW-NEXT: shrl $8, %ecx
+; X64-AVX512BW-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: movl %eax, %ecx
+; X64-AVX512BW-NEXT: shrl $16, %ecx
+; X64-AVX512BW-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: movl %eax, %ecx
+; X64-AVX512BW-NEXT: shrl $24, %ecx
+; X64-AVX512BW-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: movq %rax, %rcx
+; X64-AVX512BW-NEXT: shrq $32, %rcx
+; X64-AVX512BW-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: movq %rax, %rcx
+; X64-AVX512BW-NEXT: shrq $40, %rax
+; X64-AVX512BW-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: shrq $48, %rcx
+; X64-AVX512BW-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: shrq $56, %rdx
+; X64-AVX512BW-NEXT: vpinsrb $15, %edx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; X64-AVX512BW-NEXT: vpcmpeqb {{.*}}(%rip), %zmm2, %k1
+; X64-AVX512BW-NEXT: vpcmpeqb {{.*}}(%rip), %zmm0, %k0 {%k1}
+; X64-AVX512BW-NEXT: kortestq %k0, %k0
+; X64-AVX512BW-NEXT: setb %al
+; X64-AVX512BW-NEXT: vzeroupper
+; X64-AVX512BW-NEXT: retq
+ %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 96) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length127(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length127:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $127
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl
+;
+; X64-LABEL: length127:
+; X64: # %bb.0:
+; X64-NEXT: movl $127, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 127) nounwind
+ ret i32 %m
+}
+
+define i1 @length127_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length127_eq:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $127
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+;
+; X64-SSE-LABEL: length127_eq:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $127, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: setne %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+; X64-AVX1-LABEL: length127_eq:
+; X64-AVX1: # %bb.0:
+; X64-AVX1-NEXT: pushq %rax
+; X64-AVX1-NEXT: movl $127, %edx
+; X64-AVX1-NEXT: callq memcmp
+; X64-AVX1-NEXT: testl %eax, %eax
+; X64-AVX1-NEXT: setne %al
+; X64-AVX1-NEXT: popq %rcx
+; X64-AVX1-NEXT: retq
+;
+; X64-AVX2-LABEL: length127_eq:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: pushq %rax
+; X64-AVX2-NEXT: movl $127, %edx
+; X64-AVX2-NEXT: callq memcmp
+; X64-AVX2-NEXT: testl %eax, %eax
+; X64-AVX2-NEXT: setne %al
+; X64-AVX2-NEXT: popq %rcx
+; X64-AVX2-NEXT: retq
+;
+; X64-AVX512F-LABEL: length127_eq:
+; X64-AVX512F: # %bb.0:
+; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1
+; X64-AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k1
+; X64-AVX512F-NEXT: vpcmpeqd 63(%rsi), %zmm1, %k0 {%k1}
+; X64-AVX512F-NEXT: kortestw %k0, %k0
+; X64-AVX512F-NEXT: setae %al
+; X64-AVX512F-NEXT: vzeroupper
+; X64-AVX512F-NEXT: retq
+;
+; X64-AVX512BW-LABEL: length127_eq:
+; X64-AVX512BW: # %bb.0:
+; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512BW-NEXT: vmovdqu64 63(%rdi), %zmm1
+; X64-AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k1
+; X64-AVX512BW-NEXT: vpcmpeqb 63(%rsi), %zmm1, %k0 {%k1}
+; X64-AVX512BW-NEXT: kortestq %k0, %k0
+; X64-AVX512BW-NEXT: setae %al
+; X64-AVX512BW-NEXT: vzeroupper
+; X64-AVX512BW-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 127) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length127_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length127_lt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $127
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: shrl $31, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: length127_lt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $127, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: shrl $31, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 127) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length127_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length127_gt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $127
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setg %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length127_gt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $127, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setg %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 127) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length127_eq_const(i8* %X) nounwind {
+; X86-LABEL: length127_eq_const:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $127
+; X86-NEXT: pushl $.L.str
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-SSE-LABEL: length127_eq_const:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $.L.str, %esi
+; X64-SSE-NEXT: movl $127, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: sete %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+; X64-AVX1-LABEL: length127_eq_const:
+; X64-AVX1: # %bb.0:
+; X64-AVX1-NEXT: pushq %rax
+; X64-AVX1-NEXT: movl $.L.str, %esi
+; X64-AVX1-NEXT: movl $127, %edx
+; X64-AVX1-NEXT: callq memcmp
+; X64-AVX1-NEXT: testl %eax, %eax
+; X64-AVX1-NEXT: sete %al
+; X64-AVX1-NEXT: popq %rcx
+; X64-AVX1-NEXT: retq
+;
+; X64-AVX2-LABEL: length127_eq_const:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: pushq %rax
+; X64-AVX2-NEXT: movl $.L.str, %esi
+; X64-AVX2-NEXT: movl $127, %edx
+; X64-AVX2-NEXT: callq memcmp
+; X64-AVX2-NEXT: testl %eax, %eax
+; X64-AVX2-NEXT: sete %al
+; X64-AVX2-NEXT: popq %rcx
+; X64-AVX2-NEXT: retq
+;
+; X64-AVX512F-LABEL: length127_eq_const:
+; X64-AVX512F: # %bb.0:
+; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512F-NEXT: vmovdqu64 63(%rdi), %zmm1
+; X64-AVX512F-NEXT: vpcmpeqd {{.*}}(%rip), %zmm0, %k1
+; X64-AVX512F-NEXT: vpcmpeqd .L.str+{{.*}}(%rip), %zmm1, %k0 {%k1}
+; X64-AVX512F-NEXT: kortestw %k0, %k0
+; X64-AVX512F-NEXT: setb %al
+; X64-AVX512F-NEXT: vzeroupper
+; X64-AVX512F-NEXT: retq
+;
+; X64-AVX512BW-LABEL: length127_eq_const:
+; X64-AVX512BW: # %bb.0:
+; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512BW-NEXT: vmovdqu64 63(%rdi), %zmm1
+; X64-AVX512BW-NEXT: vpcmpeqb {{.*}}(%rip), %zmm0, %k1
+; X64-AVX512BW-NEXT: vpcmpeqb .L.str+{{.*}}(%rip), %zmm1, %k0 {%k1}
+; X64-AVX512BW-NEXT: kortestq %k0, %k0
+; X64-AVX512BW-NEXT: setb %al
+; X64-AVX512BW-NEXT: vzeroupper
+; X64-AVX512BW-NEXT: retq
+ %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 127) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length128(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length128:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $128
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl
+;
+; X64-LABEL: length128:
+; X64: # %bb.0:
+; X64-NEXT: movl $128, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 128) nounwind
+ ret i32 %m
+}
+
+define i1 @length128_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length128_eq:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $128
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+;
+; X64-SSE-LABEL: length128_eq:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $128, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: setne %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+; X64-AVX1-LABEL: length128_eq:
+; X64-AVX1: # %bb.0:
+; X64-AVX1-NEXT: pushq %rax
+; X64-AVX1-NEXT: movl $128, %edx
+; X64-AVX1-NEXT: callq memcmp
+; X64-AVX1-NEXT: testl %eax, %eax
+; X64-AVX1-NEXT: setne %al
+; X64-AVX1-NEXT: popq %rcx
+; X64-AVX1-NEXT: retq
+;
+; X64-AVX2-LABEL: length128_eq:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: pushq %rax
+; X64-AVX2-NEXT: movl $128, %edx
+; X64-AVX2-NEXT: callq memcmp
+; X64-AVX2-NEXT: testl %eax, %eax
+; X64-AVX2-NEXT: setne %al
+; X64-AVX2-NEXT: popq %rcx
+; X64-AVX2-NEXT: retq
+;
+; X64-AVX512F-LABEL: length128_eq:
+; X64-AVX512F: # %bb.0:
+; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
+; X64-AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k1
+; X64-AVX512F-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
+; X64-AVX512F-NEXT: kortestw %k0, %k0
+; X64-AVX512F-NEXT: setae %al
+; X64-AVX512F-NEXT: vzeroupper
+; X64-AVX512F-NEXT: retq
+;
+; X64-AVX512BW-LABEL: length128_eq:
+; X64-AVX512BW: # %bb.0:
+; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1
+; X64-AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k1
+; X64-AVX512BW-NEXT: vpcmpeqb 64(%rsi), %zmm1, %k0 {%k1}
+; X64-AVX512BW-NEXT: kortestq %k0, %k0
+; X64-AVX512BW-NEXT: setae %al
+; X64-AVX512BW-NEXT: vzeroupper
+; X64-AVX512BW-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 128) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length128_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length128_lt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $128
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: shrl $31, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: length128_lt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $128, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: shrl $31, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 128) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length128_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length128_gt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $128
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setg %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length128_gt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $128, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setg %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 128) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length128_eq_const(i8* %X) nounwind {
+; X86-LABEL: length128_eq_const:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $128
+; X86-NEXT: pushl $.L.str
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-SSE-LABEL: length128_eq_const:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movl $.L.str, %esi
+; X64-SSE-NEXT: movl $128, %edx
+; X64-SSE-NEXT: callq memcmp
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: sete %al
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+; X64-AVX1-LABEL: length128_eq_const:
+; X64-AVX1: # %bb.0:
+; X64-AVX1-NEXT: pushq %rax
+; X64-AVX1-NEXT: movl $.L.str, %esi
+; X64-AVX1-NEXT: movl $128, %edx
+; X64-AVX1-NEXT: callq memcmp
+; X64-AVX1-NEXT: testl %eax, %eax
+; X64-AVX1-NEXT: sete %al
+; X64-AVX1-NEXT: popq %rcx
+; X64-AVX1-NEXT: retq
+;
+; X64-AVX2-LABEL: length128_eq_const:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: pushq %rax
+; X64-AVX2-NEXT: movl $.L.str, %esi
+; X64-AVX2-NEXT: movl $128, %edx
+; X64-AVX2-NEXT: callq memcmp
+; X64-AVX2-NEXT: testl %eax, %eax
+; X64-AVX2-NEXT: sete %al
+; X64-AVX2-NEXT: popq %rcx
+; X64-AVX2-NEXT: retq
+;
+; X64-AVX512F-LABEL: length128_eq_const:
+; X64-AVX512F: # %bb.0:
+; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
+; X64-AVX512F-NEXT: vpcmpeqd {{.*}}(%rip), %zmm0, %k1
+; X64-AVX512F-NEXT: vpcmpeqd .L.str+{{.*}}(%rip), %zmm1, %k0 {%k1}
+; X64-AVX512F-NEXT: kortestw %k0, %k0
+; X64-AVX512F-NEXT: setb %al
+; X64-AVX512F-NEXT: vzeroupper
+; X64-AVX512F-NEXT: retq
+;
+; X64-AVX512BW-LABEL: length128_eq_const:
+; X64-AVX512BW: # %bb.0:
+; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1
+; X64-AVX512BW-NEXT: vpcmpeqb {{.*}}(%rip), %zmm0, %k1
+; X64-AVX512BW-NEXT: vpcmpeqb .L.str+{{.*}}(%rip), %zmm1, %k0 {%k1}
+; X64-AVX512BW-NEXT: kortestq %k0, %k0
+; X64-AVX512BW-NEXT: setb %al
+; X64-AVX512BW-NEXT: vzeroupper
+; X64-AVX512BW-NEXT: retq
+ %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 128) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length192(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length192:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $192
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl
+;
+; X64-LABEL: length192:
+; X64: # %bb.0:
+; X64-NEXT: movl $192, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 192) nounwind
+ ret i32 %m
+}
+
+define i1 @length192_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length192_eq:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $192
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length192_eq:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $192, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setne %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 192) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length192_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length192_lt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $192
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: shrl $31, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: length192_lt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $192, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: shrl $31, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 192) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length192_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length192_gt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $192
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setg %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length192_gt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $192, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setg %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 192) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length192_eq_const(i8* %X) nounwind {
+; X86-LABEL: length192_eq_const:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $192
+; X86-NEXT: pushl $.L.str
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length192_eq_const:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $.L.str, %esi
+; X64-NEXT: movl $192, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: sete %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 192) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length255(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length255:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $255
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl
+;
+; X64-LABEL: length255:
+; X64: # %bb.0:
+; X64-NEXT: movl $255, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 255) nounwind
+ ret i32 %m
+}
+
+define i1 @length255_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length255_eq:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $255
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length255_eq:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $255, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setne %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 255) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length255_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length255_lt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $255
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: shrl $31, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: length255_lt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $255, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: shrl $31, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 255) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length255_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length255_gt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $255
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setg %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length255_gt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $255, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setg %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 255) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length255_eq_const(i8* %X) nounwind {
+; X86-LABEL: length255_eq_const:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $255
+; X86-NEXT: pushl $.L.str
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length255_eq_const:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $.L.str, %esi
+; X64-NEXT: movl $255, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: sete %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 255) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length256(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length256:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $256 # imm = 0x100
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl
+;
+; X64-LABEL: length256:
+; X64: # %bb.0:
+; X64-NEXT: movl $256, %edx # imm = 0x100
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 256) nounwind
+ ret i32 %m
+}
+
+define i1 @length256_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length256_eq:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $256 # imm = 0x100
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length256_eq:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $256, %edx # imm = 0x100
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setne %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 256) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length256_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length256_lt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $256 # imm = 0x100
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: shrl $31, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: length256_lt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $256, %edx # imm = 0x100
+; X64-NEXT: callq memcmp
+; X64-NEXT: shrl $31, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 256) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length256_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length256_gt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $256 # imm = 0x100
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setg %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length256_gt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $256, %edx # imm = 0x100
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setg %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 256) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length256_eq_const(i8* %X) nounwind {
+; X86-LABEL: length256_eq_const:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $256 # imm = 0x100
+; X86-NEXT: pushl $.L.str
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length256_eq_const:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $.L.str, %esi
+; X64-NEXT: movl $256, %edx # imm = 0x100
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: sete %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 256) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length384(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length384:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $384 # imm = 0x180
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl
+;
+; X64-LABEL: length384:
+; X64: # %bb.0:
+; X64-NEXT: movl $384, %edx # imm = 0x180
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 384) nounwind
+ ret i32 %m
+}
+
+define i1 @length384_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length384_eq:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $384 # imm = 0x180
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length384_eq:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $384, %edx # imm = 0x180
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setne %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 384) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length384_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length384_lt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $384 # imm = 0x180
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: shrl $31, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: length384_lt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $384, %edx # imm = 0x180
+; X64-NEXT: callq memcmp
+; X64-NEXT: shrl $31, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 384) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length384_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length384_gt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $384 # imm = 0x180
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setg %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length384_gt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $384, %edx # imm = 0x180
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setg %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 384) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length384_eq_const(i8* %X) nounwind {
+; X86-LABEL: length384_eq_const:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $384 # imm = 0x180
+; X86-NEXT: pushl $.L.str
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length384_eq_const:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $.L.str, %esi
+; X64-NEXT: movl $384, %edx # imm = 0x180
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: sete %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 384) nounwind
+ %c = icmp eq i32 %m, 0
ret i1 %c
}
-define i32 @length64(i8* %X, i8* %Y) nounwind {
-; X86-LABEL: length64:
+define i32 @length511(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length511:
; X86: # %bb.0:
; X86-NEXT: pushl $0
-; X86-NEXT: pushl $64
+; X86-NEXT: pushl $511 # imm = 0x1FF
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll memcmp
; X86-NEXT: addl $16, %esp
; X86-NEXT: retl
;
-; X64-LABEL: length64:
+; X64-LABEL: length511:
; X64: # %bb.0:
-; X64-NEXT: movl $64, %edx
+; X64-NEXT: movl $511, %edx # imm = 0x1FF
; X64-NEXT: jmp memcmp # TAILCALL
- %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 64) nounwind
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 511) nounwind
ret i32 %m
}
-define i1 @length64_eq(i8* %x, i8* %y) nounwind {
-; X86-LABEL: length64_eq:
+define i1 @length511_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length511_eq:
; X86: # %bb.0:
; X86-NEXT: pushl $0
-; X86-NEXT: pushl $64
+; X86-NEXT: pushl $511 # imm = 0x1FF
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll memcmp
@@ -1569,65 +4619,79 @@ define i1 @length64_eq(i8* %x, i8* %y) nounwind {
; X86-NEXT: setne %al
; X86-NEXT: retl
;
-; X64-SSE2-LABEL: length64_eq:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pushq %rax
-; X64-SSE2-NEXT: movl $64, %edx
-; X64-SSE2-NEXT: callq memcmp
-; X64-SSE2-NEXT: testl %eax, %eax
-; X64-SSE2-NEXT: setne %al
-; X64-SSE2-NEXT: popq %rcx
-; X64-SSE2-NEXT: retq
-;
-; X64-AVX1-LABEL: length64_eq:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: pushq %rax
-; X64-AVX1-NEXT: movl $64, %edx
-; X64-AVX1-NEXT: callq memcmp
-; X64-AVX1-NEXT: testl %eax, %eax
-; X64-AVX1-NEXT: setne %al
-; X64-AVX1-NEXT: popq %rcx
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: length64_eq:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: setne %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
+; X64-LABEL: length511_eq:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $511, %edx # imm = 0x1FF
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setne %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 511) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length511_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length511_lt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $511 # imm = 0x1FF
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: shrl $31, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
;
-; X64-AVX512F-LABEL: length64_eq:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k0
-; X64-AVX512F-NEXT: kortestw %k0, %k0
-; X64-AVX512F-NEXT: setae %al
-; X64-AVX512F-NEXT: vzeroupper
-; X64-AVX512F-NEXT: retq
+; X64-LABEL: length511_lt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $511, %edx # imm = 0x1FF
+; X64-NEXT: callq memcmp
+; X64-NEXT: shrl $31, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 511) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length511_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length511_gt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $511 # imm = 0x1FF
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setg %al
+; X86-NEXT: retl
;
-; X64-AVX512BW-LABEL: length64_eq:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k0
-; X64-AVX512BW-NEXT: kortestq %k0, %k0
-; X64-AVX512BW-NEXT: setae %al
-; X64-AVX512BW-NEXT: vzeroupper
-; X64-AVX512BW-NEXT: retq
- %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
- %cmp = icmp ne i32 %call, 0
+; X64-LABEL: length511_gt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $511, %edx # imm = 0x1FF
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setg %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 511) nounwind
+ %cmp = icmp sgt i32 %call, 0
ret i1 %cmp
}
-define i1 @length64_eq_const(i8* %X) nounwind {
-; X86-LABEL: length64_eq_const:
+define i1 @length511_eq_const(i8* %X) nounwind {
+; X86-LABEL: length511_eq_const:
; X86: # %bb.0:
; X86-NEXT: pushl $0
-; X86-NEXT: pushl $64
+; X86-NEXT: pushl $511 # imm = 0x1FF
; X86-NEXT: pushl $.L.str
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll memcmp
@@ -1636,58 +4700,145 @@ define i1 @length64_eq_const(i8* %X) nounwind {
; X86-NEXT: sete %al
; X86-NEXT: retl
;
-; X64-SSE2-LABEL: length64_eq_const:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pushq %rax
-; X64-SSE2-NEXT: movl $.L.str, %esi
-; X64-SSE2-NEXT: movl $64, %edx
-; X64-SSE2-NEXT: callq memcmp
-; X64-SSE2-NEXT: testl %eax, %eax
-; X64-SSE2-NEXT: sete %al
-; X64-SSE2-NEXT: popq %rcx
-; X64-SSE2-NEXT: retq
+; X64-LABEL: length511_eq_const:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $.L.str, %esi
+; X64-NEXT: movl $511, %edx # imm = 0x1FF
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: sete %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 511) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length512(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length512:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $512 # imm = 0x200
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl
;
-; X64-AVX1-LABEL: length64_eq_const:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: pushq %rax
-; X64-AVX1-NEXT: movl $.L.str, %esi
-; X64-AVX1-NEXT: movl $64, %edx
-; X64-AVX1-NEXT: callq memcmp
-; X64-AVX1-NEXT: testl %eax, %eax
-; X64-AVX1-NEXT: sete %al
-; X64-AVX1-NEXT: popq %rcx
-; X64-AVX1-NEXT: retq
+; X64-LABEL: length512:
+; X64: # %bb.0:
+; X64-NEXT: movl $512, %edx # imm = 0x200
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 512) nounwind
+ ret i32 %m
+}
+
+define i1 @length512_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length512_eq:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $512 # imm = 0x200
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setne %al
+; X86-NEXT: retl
;
-; X64-AVX2-LABEL: length64_eq_const:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1
-; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
-; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vptest %ymm0, %ymm0
-; X64-AVX2-NEXT: sete %al
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
+; X64-LABEL: length512_eq:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $512, %edx # imm = 0x200
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setne %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 512) nounwind
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length512_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length512_lt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $512 # imm = 0x200
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: shrl $31, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
;
-; X64-AVX512F-LABEL: length64_eq_const:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT: vpcmpeqd {{.*}}(%rip), %zmm0, %k0
-; X64-AVX512F-NEXT: kortestw %k0, %k0
-; X64-AVX512F-NEXT: setb %al
-; X64-AVX512F-NEXT: vzeroupper
-; X64-AVX512F-NEXT: retq
+; X64-LABEL: length512_lt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $512, %edx # imm = 0x200
+; X64-NEXT: callq memcmp
+; X64-NEXT: shrl $31, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 512) nounwind
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length512_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length512_gt:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $512 # imm = 0x200
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setg %al
+; X86-NEXT: retl
;
-; X64-AVX512BW-LABEL: length64_eq_const:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT: vpcmpeqb {{.*}}(%rip), %zmm0, %k0
-; X64-AVX512BW-NEXT: kortestq %k0, %k0
-; X64-AVX512BW-NEXT: setb %al
-; X64-AVX512BW-NEXT: vzeroupper
-; X64-AVX512BW-NEXT: retq
- %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
+; X64-LABEL: length512_gt:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $512, %edx # imm = 0x200
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setg %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 512) nounwind
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
+}
+
+define i1 @length512_eq_const(i8* %X) nounwind {
+; X86-LABEL: length512_eq_const:
+; X86: # %bb.0:
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $512 # imm = 0x200
+; X86-NEXT: pushl $.L.str
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: length512_eq_const:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $.L.str, %esi
+; X64-NEXT: movl $512, %edx # imm = 0x200
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: sete %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 512) nounwind
%c = icmp eq i32 %m, 0
ret i1 %c
}
More information about the llvm-commits
mailing list