[llvm] 0d05093 - [X86] NFC: expand inline memcmp test coverage

David Zarzycki via llvm-commits llvm-commits at lists.llvm.org
Sat Oct 26 11:21:44 PDT 2019


Author: David Zarzycki
Date: 2019-10-26T21:14:57+03:00
New Revision: 0d0509384f054cb4f13260786ee48163ac94d123

URL: https://github.com/llvm/llvm-project/commit/0d0509384f054cb4f13260786ee48163ac94d123
DIFF: https://github.com/llvm/llvm-project/commit/0d0509384f054cb4f13260786ee48163ac94d123.diff

LOG: [X86] NFC: expand inline memcmp test coverage

1) Adds SSE4.1 coverage.
2) Adds prefer-256-bit or not coverage.
3) Adds more power-of-two tests up to 512 bytes.
4) Adds power-of-two-minus-one tests to verify overlapping loads.
5) Adds power-of-two-plus-one-half tests (48, 96, 192, and 384).
6) Adds greater-than/less-than tests from 16 to 512 bytes.

https://reviews.llvm.org/D69222

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/memcmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll
index 97116d991c10..1493879649ef 100644
--- a/llvm/test/CodeGen/X86/memcmp.ll
+++ b/llvm/test/CodeGen/X86/memcmp.ll
@@ -1,17 +1,21 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov   | FileCheck %s --check-prefixes=X86,X86-NOSSE
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse   | FileCheck %s --check-prefixes=X86,SSE,X86-SSE1
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2  | FileCheck %s --check-prefixes=X86,SSE,X86-SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown             | FileCheck %s --check-prefixes=X64,X64-SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx  | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX512,X64-AVX512F
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefixes=X64,X64-AVX512,X64-AVX512BW
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov     | FileCheck %s --check-prefixes=X86,X86-NOSSE
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse     | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE1
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2    | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE2
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1  | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown               | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx    | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2   | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit  | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,+prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit  | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,-prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512BW
 
 ; This tests codegen time inlining/optimization of memcmp
 ; rdar://6480398
 
- at .str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
+ at .str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1
 
 declare i32 @memcmp(i8*, i8*, i64)
 
@@ -189,7 +193,7 @@ define i1 @length2_eq_const(i8* %X) nounwind {
 ; X64-NEXT:    cmpl $12849, %eax # imm = 0x3231
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
-  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 2) nounwind
+  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 2) nounwind
   %c = icmp ne i32 %m, 0
   ret i1 %c
 }
@@ -431,7 +435,7 @@ define i1 @length4_eq_const(i8* %X) nounwind {
 ; X64-NEXT:    cmpl $875770417, (%rdi) # imm = 0x34333231
 ; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
-  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 1), i64 4) nounwind
+  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 4) nounwind
   %c = icmp eq i32 %m, 0
   ret i1 %c
 }
@@ -679,7 +683,7 @@ define i1 @length8_eq_const(i8* %X) nounwind {
 ; X64-NEXT:    cmpq %rax, (%rdi)
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
-  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 8) nounwind
+  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 8) nounwind
   %c = icmp ne i32 %m, 0
   ret i1 %c
 }
@@ -990,6 +994,17 @@ define i1 @length16_eq(i8* %x, i8* %y) nounwind {
 ; X86-SSE2-NEXT:    setne %al
 ; X86-SSE2-NEXT:    retl
 ;
+; X86-SSE41-LABEL: length16_eq:
+; X86-SSE41:       # %bb.0:
+; X86-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE41-NEXT:    movdqu (%ecx), %xmm0
+; X86-SSE41-NEXT:    movdqu (%eax), %xmm1
+; X86-SSE41-NEXT:    pxor %xmm0, %xmm1
+; X86-SSE41-NEXT:    ptest %xmm1, %xmm1
+; X86-SSE41-NEXT:    setne %al
+; X86-SSE41-NEXT:    retl
+;
 ; X64-SSE2-LABEL: length16_eq:
 ; X64-SSE2:       # %bb.0:
 ; X64-SSE2-NEXT:    movdqu (%rdi), %xmm0
@@ -1000,6 +1015,15 @@ define i1 @length16_eq(i8* %x, i8* %y) nounwind {
 ; X64-SSE2-NEXT:    setne %al
 ; X64-SSE2-NEXT:    retq
 ;
+; X64-SSE41-LABEL: length16_eq:
+; X64-SSE41:       # %bb.0:
+; X64-SSE41-NEXT:    movdqu (%rdi), %xmm0
+; X64-SSE41-NEXT:    movdqu (%rsi), %xmm1
+; X64-SSE41-NEXT:    pxor %xmm0, %xmm1
+; X64-SSE41-NEXT:    ptest %xmm1, %xmm1
+; X64-SSE41-NEXT:    setne %al
+; X64-SSE41-NEXT:    retq
+;
 ; X64-AVX-LABEL: length16_eq:
 ; X64-AVX:       # %bb.0:
 ; X64-AVX-NEXT:    vmovdqu (%rdi), %xmm0
@@ -1007,19 +1031,97 @@ define i1 @length16_eq(i8* %x, i8* %y) nounwind {
 ; X64-AVX-NEXT:    vptest %xmm0, %xmm0
 ; X64-AVX-NEXT:    setne %al
 ; X64-AVX-NEXT:    retq
-;
-; X64-AVX512-LABEL: length16_eq:
-; X64-AVX512:       # %bb.0:
-; X64-AVX512-NEXT:    vmovdqu (%rdi), %xmm0
-; X64-AVX512-NEXT:    vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX512-NEXT:    vptest %xmm0, %xmm0
-; X64-AVX512-NEXT:    setne %al
-; X64-AVX512-NEXT:    retq
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind
   %cmp = icmp ne i32 %call, 0
   ret i1 %cmp
 }
 
+define i1 @length16_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length16_lt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $16
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    shrl $31, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: length16_lt:
+; X64:       # %bb.0:
+; X64-NEXT:    movq (%rdi), %rcx
+; X64-NEXT:    movq (%rsi), %rdx
+; X64-NEXT:    bswapq %rcx
+; X64-NEXT:    bswapq %rdx
+; X64-NEXT:    cmpq %rdx, %rcx
+; X64-NEXT:    jne .LBB33_2
+; X64-NEXT:  # %bb.1: # %loadbb1
+; X64-NEXT:    movq 8(%rdi), %rcx
+; X64-NEXT:    movq 8(%rsi), %rdx
+; X64-NEXT:    bswapq %rcx
+; X64-NEXT:    bswapq %rdx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    cmpq %rdx, %rcx
+; X64-NEXT:    je .LBB33_3
+; X64-NEXT:  .LBB33_2: # %res_block
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    cmpq %rdx, %rcx
+; X64-NEXT:    setae %al
+; X64-NEXT:    leal -1(%rax,%rax), %eax
+; X64-NEXT:  .LBB33_3: # %endblock
+; X64-NEXT:    shrl $31, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind
+  %cmp = icmp slt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length16_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length16_gt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $16
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setg %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length16_gt:
+; X64:       # %bb.0:
+; X64-NEXT:    movq (%rdi), %rax
+; X64-NEXT:    movq (%rsi), %rcx
+; X64-NEXT:    bswapq %rax
+; X64-NEXT:    bswapq %rcx
+; X64-NEXT:    cmpq %rcx, %rax
+; X64-NEXT:    jne .LBB34_2
+; X64-NEXT:  # %bb.1: # %loadbb1
+; X64-NEXT:    movq 8(%rdi), %rax
+; X64-NEXT:    movq 8(%rsi), %rcx
+; X64-NEXT:    bswapq %rax
+; X64-NEXT:    bswapq %rcx
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    cmpq %rcx, %rax
+; X64-NEXT:    je .LBB34_3
+; X64-NEXT:  .LBB34_2: # %res_block
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    cmpq %rcx, %rax
+; X64-NEXT:    setae %dl
+; X64-NEXT:    leal -1(%rdx,%rdx), %edx
+; X64-NEXT:  .LBB34_3: # %endblock
+; X64-NEXT:    testl %edx, %edx
+; X64-NEXT:    setg %al
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind
+  %cmp = icmp sgt i32 %call, 0
+  ret i1 %cmp
+}
+
 define i1 @length16_eq_const(i8* %X) nounwind {
 ; X86-NOSSE-LABEL: length16_eq_const:
 ; X86-NOSSE:       # %bb.0:
@@ -1055,6 +1157,15 @@ define i1 @length16_eq_const(i8* %X) nounwind {
 ; X86-SSE2-NEXT:    sete %al
 ; X86-SSE2-NEXT:    retl
 ;
+; X86-SSE41-LABEL: length16_eq_const:
+; X86-SSE41:       # %bb.0:
+; X86-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE41-NEXT:    movdqu (%eax), %xmm0
+; X86-SSE41-NEXT:    pxor {{\.LCPI.*}}, %xmm0
+; X86-SSE41-NEXT:    ptest %xmm0, %xmm0
+; X86-SSE41-NEXT:    sete %al
+; X86-SSE41-NEXT:    retl
+;
 ; X64-SSE2-LABEL: length16_eq_const:
 ; X64-SSE2:       # %bb.0:
 ; X64-SSE2-NEXT:    movdqu (%rdi), %xmm0
@@ -1064,6 +1175,14 @@ define i1 @length16_eq_const(i8* %X) nounwind {
 ; X64-SSE2-NEXT:    sete %al
 ; X64-SSE2-NEXT:    retq
 ;
+; X64-SSE41-LABEL: length16_eq_const:
+; X64-SSE41:       # %bb.0:
+; X64-SSE41-NEXT:    movdqu (%rdi), %xmm0
+; X64-SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
+; X64-SSE41-NEXT:    ptest %xmm0, %xmm0
+; X64-SSE41-NEXT:    sete %al
+; X64-SSE41-NEXT:    retq
+;
 ; X64-AVX-LABEL: length16_eq_const:
 ; X64-AVX:       # %bb.0:
 ; X64-AVX-NEXT:    vmovdqu (%rdi), %xmm0
@@ -1071,15 +1190,7 @@ define i1 @length16_eq_const(i8* %X) nounwind {
 ; X64-AVX-NEXT:    vptest %xmm0, %xmm0
 ; X64-AVX-NEXT:    sete %al
 ; X64-AVX-NEXT:    retq
-;
-; X64-AVX512-LABEL: length16_eq_const:
-; X64-AVX512:       # %bb.0:
-; X64-AVX512-NEXT:    vmovdqu (%rdi), %xmm0
-; X64-AVX512-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
-; X64-AVX512-NEXT:    vptest %xmm0, %xmm0
-; X64-AVX512-NEXT:    sete %al
-; X64-AVX512-NEXT:    retq
-  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 16) nounwind
+  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 16) nounwind
   %c = icmp eq i32 %m, 0
   ret i1 %c
 }
@@ -1146,6 +1257,21 @@ define i1 @length24_eq(i8* %x, i8* %y) nounwind {
 ; X86-SSE2-NEXT:    sete %al
 ; X86-SSE2-NEXT:    retl
 ;
+; X86-SSE41-LABEL: length24_eq:
+; X86-SSE41:       # %bb.0:
+; X86-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE41-NEXT:    movdqu (%ecx), %xmm0
+; X86-SSE41-NEXT:    movdqu 8(%ecx), %xmm1
+; X86-SSE41-NEXT:    movdqu (%eax), %xmm2
+; X86-SSE41-NEXT:    pxor %xmm0, %xmm2
+; X86-SSE41-NEXT:    movdqu 8(%eax), %xmm0
+; X86-SSE41-NEXT:    pxor %xmm1, %xmm0
+; X86-SSE41-NEXT:    por %xmm2, %xmm0
+; X86-SSE41-NEXT:    ptest %xmm0, %xmm0
+; X86-SSE41-NEXT:    sete %al
+; X86-SSE41-NEXT:    retl
+;
 ; X64-SSE2-LABEL: length24_eq:
 ; X64-SSE2:       # %bb.0:
 ; X64-SSE2-NEXT:    movdqu (%rdi), %xmm0
@@ -1160,6 +1286,19 @@ define i1 @length24_eq(i8* %x, i8* %y) nounwind {
 ; X64-SSE2-NEXT:    sete %al
 ; X64-SSE2-NEXT:    retq
 ;
+; X64-SSE41-LABEL: length24_eq:
+; X64-SSE41:       # %bb.0:
+; X64-SSE41-NEXT:    movdqu (%rdi), %xmm0
+; X64-SSE41-NEXT:    movdqu (%rsi), %xmm1
+; X64-SSE41-NEXT:    pxor %xmm0, %xmm1
+; X64-SSE41-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; X64-SSE41-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
+; X64-SSE41-NEXT:    pxor %xmm0, %xmm2
+; X64-SSE41-NEXT:    por %xmm1, %xmm2
+; X64-SSE41-NEXT:    ptest %xmm2, %xmm2
+; X64-SSE41-NEXT:    sete %al
+; X64-SSE41-NEXT:    retq
+;
 ; X64-AVX-LABEL: length24_eq:
 ; X64-AVX:       # %bb.0:
 ; X64-AVX-NEXT:    vmovdqu (%rdi), %xmm0
@@ -1171,23 +1310,65 @@ define i1 @length24_eq(i8* %x, i8* %y) nounwind {
 ; X64-AVX-NEXT:    vptest %xmm0, %xmm0
 ; X64-AVX-NEXT:    sete %al
 ; X64-AVX-NEXT:    retq
-;
-; X64-AVX512-LABEL: length24_eq:
-; X64-AVX512:       # %bb.0:
-; X64-AVX512-NEXT:    vmovdqu (%rdi), %xmm0
-; X64-AVX512-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
-; X64-AVX512-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
-; X64-AVX512-NEXT:    vpxor %xmm2, %xmm1, %xmm1
-; X64-AVX512-NEXT:    vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT:    vptest %xmm0, %xmm0
-; X64-AVX512-NEXT:    sete %al
-; X64-AVX512-NEXT:    retq
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
   %cmp = icmp eq i32 %call, 0
   ret i1 %cmp
 }
 
+define i1 @length24_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length24_lt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $24
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    shrl $31, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: length24_lt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $24, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    shrl $31, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
+  %cmp = icmp slt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length24_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length24_gt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $24
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setg %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length24_gt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $24, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setg %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
+  %cmp = icmp sgt i32 %call, 0
+  ret i1 %cmp
+}
+
 define i1 @length24_eq_const(i8* %X) nounwind {
 ; X86-NOSSE-LABEL: length24_eq_const:
 ; X86-NOSSE:       # %bb.0:
@@ -1226,6 +1407,18 @@ define i1 @length24_eq_const(i8* %X) nounwind {
 ; X86-SSE2-NEXT:    setne %al
 ; X86-SSE2-NEXT:    retl
 ;
+; X86-SSE41-LABEL: length24_eq_const:
+; X86-SSE41:       # %bb.0:
+; X86-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE41-NEXT:    movdqu (%eax), %xmm0
+; X86-SSE41-NEXT:    movdqu 8(%eax), %xmm1
+; X86-SSE41-NEXT:    pxor {{\.LCPI.*}}, %xmm1
+; X86-SSE41-NEXT:    pxor {{\.LCPI.*}}, %xmm0
+; X86-SSE41-NEXT:    por %xmm1, %xmm0
+; X86-SSE41-NEXT:    ptest %xmm0, %xmm0
+; X86-SSE41-NEXT:    setne %al
+; X86-SSE41-NEXT:    retl
+;
 ; X64-SSE2-LABEL: length24_eq_const:
 ; X64-SSE2:       # %bb.0:
 ; X64-SSE2-NEXT:    movdqu (%rdi), %xmm0
@@ -1238,6 +1431,17 @@ define i1 @length24_eq_const(i8* %X) nounwind {
 ; X64-SSE2-NEXT:    setne %al
 ; X64-SSE2-NEXT:    retq
 ;
+; X64-SSE41-LABEL: length24_eq_const:
+; X64-SSE41:       # %bb.0:
+; X64-SSE41-NEXT:    movdqu (%rdi), %xmm0
+; X64-SSE41-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; X64-SSE41-NEXT:    pxor {{.*}}(%rip), %xmm1
+; X64-SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
+; X64-SSE41-NEXT:    por %xmm1, %xmm0
+; X64-SSE41-NEXT:    ptest %xmm0, %xmm0
+; X64-SSE41-NEXT:    setne %al
+; X64-SSE41-NEXT:    retq
+;
 ; X64-AVX-LABEL: length24_eq_const:
 ; X64-AVX:       # %bb.0:
 ; X64-AVX-NEXT:    vmovdqu (%rdi), %xmm0
@@ -1248,48 +1452,35 @@ define i1 @length24_eq_const(i8* %X) nounwind {
 ; X64-AVX-NEXT:    vptest %xmm0, %xmm0
 ; X64-AVX-NEXT:    setne %al
 ; X64-AVX-NEXT:    retq
-;
-; X64-AVX512-LABEL: length24_eq_const:
-; X64-AVX512:       # %bb.0:
-; X64-AVX512-NEXT:    vmovdqu (%rdi), %xmm0
-; X64-AVX512-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
-; X64-AVX512-NEXT:    vpxor {{.*}}(%rip), %xmm1, %xmm1
-; X64-AVX512-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
-; X64-AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT:    vptest %xmm0, %xmm0
-; X64-AVX512-NEXT:    setne %al
-; X64-AVX512-NEXT:    retq
-  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
+  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
   %c = icmp ne i32 %m, 0
   ret i1 %c
 }
 
-define i32 @length32(i8* %X, i8* %Y) nounwind {
-; X86-LABEL: length32:
+define i32 @length31(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length31:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl $0
-; X86-NEXT:    pushl $32
+; X86-NEXT:    pushl $31
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    calll memcmp
 ; X86-NEXT:    addl $16, %esp
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: length32:
+; X64-LABEL: length31:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl $32, %edx
+; X64-NEXT:    movl $31, %edx
 ; X64-NEXT:    jmp memcmp # TAILCALL
-  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 31) nounwind
   ret i32 %m
 }
 
-; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
-
-define i1 @length32_eq(i8* %x, i8* %y) nounwind {
-; X86-NOSSE-LABEL: length32_eq:
+define i1 @length31_eq(i8* %x, i8* %y) nounwind {
+; X86-NOSSE-LABEL: length31_eq:
 ; X86-NOSSE:       # %bb.0:
 ; X86-NOSSE-NEXT:    pushl $0
-; X86-NOSSE-NEXT:    pushl $32
+; X86-NOSSE-NEXT:    pushl $31
 ; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    calll memcmp
@@ -1298,10 +1489,10 @@ define i1 @length32_eq(i8* %x, i8* %y) nounwind {
 ; X86-NOSSE-NEXT:    sete %al
 ; X86-NOSSE-NEXT:    retl
 ;
-; X86-SSE1-LABEL: length32_eq:
+; X86-SSE1-LABEL: length31_eq:
 ; X86-SSE1:       # %bb.0:
 ; X86-SSE1-NEXT:    pushl $0
-; X86-SSE1-NEXT:    pushl $32
+; X86-SSE1-NEXT:    pushl $31
 ; X86-SSE1-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-SSE1-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-SSE1-NEXT:    calll memcmp
@@ -1310,15 +1501,15 @@ define i1 @length32_eq(i8* %x, i8* %y) nounwind {
 ; X86-SSE1-NEXT:    sete %al
 ; X86-SSE1-NEXT:    retl
 ;
-; X86-SSE2-LABEL: length32_eq:
+; X86-SSE2-LABEL: length31_eq:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-SSE2-NEXT:    movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT:    movdqu 16(%ecx), %xmm1
+; X86-SSE2-NEXT:    movdqu 15(%ecx), %xmm1
 ; X86-SSE2-NEXT:    movdqu (%eax), %xmm2
 ; X86-SSE2-NEXT:    pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT:    movdqu 16(%eax), %xmm0
+; X86-SSE2-NEXT:    movdqu 15(%eax), %xmm0
 ; X86-SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
 ; X86-SSE2-NEXT:    pand %xmm2, %xmm0
 ; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
@@ -1326,13 +1517,28 @@ define i1 @length32_eq(i8* %x, i8* %y) nounwind {
 ; X86-SSE2-NEXT:    sete %al
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-SSE2-LABEL: length32_eq:
+; X86-SSE41-LABEL: length31_eq:
+; X86-SSE41:       # %bb.0:
+; X86-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE41-NEXT:    movdqu (%ecx), %xmm0
+; X86-SSE41-NEXT:    movdqu 15(%ecx), %xmm1
+; X86-SSE41-NEXT:    movdqu (%eax), %xmm2
+; X86-SSE41-NEXT:    pxor %xmm0, %xmm2
+; X86-SSE41-NEXT:    movdqu 15(%eax), %xmm0
+; X86-SSE41-NEXT:    pxor %xmm1, %xmm0
+; X86-SSE41-NEXT:    por %xmm2, %xmm0
+; X86-SSE41-NEXT:    ptest %xmm0, %xmm0
+; X86-SSE41-NEXT:    sete %al
+; X86-SSE41-NEXT:    retl
+;
+; X64-SSE2-LABEL: length31_eq:
 ; X64-SSE2:       # %bb.0:
 ; X64-SSE2-NEXT:    movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT:    movdqu 16(%rdi), %xmm1
+; X64-SSE2-NEXT:    movdqu 15(%rdi), %xmm1
 ; X64-SSE2-NEXT:    movdqu (%rsi), %xmm2
 ; X64-SSE2-NEXT:    pcmpeqb %xmm0, %xmm2
-; X64-SSE2-NEXT:    movdqu 16(%rsi), %xmm0
+; X64-SSE2-NEXT:    movdqu 15(%rsi), %xmm0
 ; X64-SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
 ; X64-SSE2-NEXT:    pand %xmm2, %xmm0
 ; X64-SSE2-NEXT:    pmovmskb %xmm0, %eax
@@ -1340,44 +1546,93 @@ define i1 @length32_eq(i8* %x, i8* %y) nounwind {
 ; X64-SSE2-NEXT:    sete %al
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX1-LABEL: length32_eq:
-; X64-AVX1:       # %bb.0:
-; X64-AVX1-NEXT:    vmovdqu (%rdi), %xmm0
-; X64-AVX1-NEXT:    vmovdqu 16(%rdi), %xmm1
-; X64-AVX1-NEXT:    vpxor 16(%rsi), %xmm1, %xmm1
-; X64-AVX1-NEXT:    vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT:    vptest %xmm0, %xmm0
-; X64-AVX1-NEXT:    sete %al
-; X64-AVX1-NEXT:    retq
-;
-; X64-AVX2-LABEL: length32_eq:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT:    vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT:    vptest %ymm0, %ymm0
-; X64-AVX2-NEXT:    sete %al
-; X64-AVX2-NEXT:    vzeroupper
-; X64-AVX2-NEXT:    retq
+; X64-SSE41-LABEL: length31_eq:
+; X64-SSE41:       # %bb.0:
+; X64-SSE41-NEXT:    movdqu (%rdi), %xmm0
+; X64-SSE41-NEXT:    movdqu 15(%rdi), %xmm1
+; X64-SSE41-NEXT:    movdqu (%rsi), %xmm2
+; X64-SSE41-NEXT:    pxor %xmm0, %xmm2
+; X64-SSE41-NEXT:    movdqu 15(%rsi), %xmm0
+; X64-SSE41-NEXT:    pxor %xmm1, %xmm0
+; X64-SSE41-NEXT:    por %xmm2, %xmm0
+; X64-SSE41-NEXT:    ptest %xmm0, %xmm0
+; X64-SSE41-NEXT:    sete %al
+; X64-SSE41-NEXT:    retq
 ;
-; X64-AVX512-LABEL: length32_eq:
-; X64-AVX512:       # %bb.0:
-; X64-AVX512-NEXT:    vmovdqu (%rdi), %ymm0
-; X64-AVX512-NEXT:    vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX512-NEXT:    vptest %ymm0, %ymm0
-; X64-AVX512-NEXT:    sete %al
-; X64-AVX512-NEXT:    vzeroupper
-; X64-AVX512-NEXT:    retq
-  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
+; X64-AVX-LABEL: length31_eq:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT:    vmovdqu 15(%rdi), %xmm1
+; X64-AVX-NEXT:    vpxor 15(%rsi), %xmm1, %xmm1
+; X64-AVX-NEXT:    vpxor (%rsi), %xmm0, %xmm0
+; X64-AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vptest %xmm0, %xmm0
+; X64-AVX-NEXT:    sete %al
+; X64-AVX-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 31) nounwind
   %cmp = icmp eq i32 %call, 0
   ret i1 %cmp
 }
 
-define i1 @length32_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"="128" {
-; X86-NOSSE-LABEL: length32_eq_prefer128:
+define i1 @length31_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length31_lt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $31
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    shrl $31, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: length31_lt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $31, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    shrl $31, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 31) nounwind
+  %cmp = icmp slt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length31_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length31_gt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $31
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setg %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length31_gt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $31, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setg %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 31) nounwind
+  %cmp = icmp sgt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length31_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"="128" {
+; X86-NOSSE-LABEL: length31_eq_prefer128:
 ; X86-NOSSE:       # %bb.0:
 ; X86-NOSSE-NEXT:    pushl $0
-; X86-NOSSE-NEXT:    pushl $32
+; X86-NOSSE-NEXT:    pushl $31
 ; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    calll memcmp
@@ -1386,10 +1641,10 @@ define i1 @length32_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"=
 ; X86-NOSSE-NEXT:    sete %al
 ; X86-NOSSE-NEXT:    retl
 ;
-; X86-SSE1-LABEL: length32_eq_prefer128:
+; X86-SSE1-LABEL: length31_eq_prefer128:
 ; X86-SSE1:       # %bb.0:
 ; X86-SSE1-NEXT:    pushl $0
-; X86-SSE1-NEXT:    pushl $32
+; X86-SSE1-NEXT:    pushl $31
 ; X86-SSE1-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-SSE1-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-SSE1-NEXT:    calll memcmp
@@ -1398,15 +1653,15 @@ define i1 @length32_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"=
 ; X86-SSE1-NEXT:    sete %al
 ; X86-SSE1-NEXT:    retl
 ;
-; X86-SSE2-LABEL: length32_eq_prefer128:
+; X86-SSE2-LABEL: length31_eq_prefer128:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-SSE2-NEXT:    movdqu (%ecx), %xmm0
-; X86-SSE2-NEXT:    movdqu 16(%ecx), %xmm1
+; X86-SSE2-NEXT:    movdqu 15(%ecx), %xmm1
 ; X86-SSE2-NEXT:    movdqu (%eax), %xmm2
 ; X86-SSE2-NEXT:    pcmpeqb %xmm0, %xmm2
-; X86-SSE2-NEXT:    movdqu 16(%eax), %xmm0
+; X86-SSE2-NEXT:    movdqu 15(%eax), %xmm0
 ; X86-SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
 ; X86-SSE2-NEXT:    pand %xmm2, %xmm0
 ; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
@@ -1414,13 +1669,28 @@ define i1 @length32_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"=
 ; X86-SSE2-NEXT:    sete %al
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-SSE2-LABEL: length32_eq_prefer128:
+; X86-SSE41-LABEL: length31_eq_prefer128:
+; X86-SSE41:       # %bb.0:
+; X86-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE41-NEXT:    movdqu (%ecx), %xmm0
+; X86-SSE41-NEXT:    movdqu 15(%ecx), %xmm1
+; X86-SSE41-NEXT:    movdqu (%eax), %xmm2
+; X86-SSE41-NEXT:    pxor %xmm0, %xmm2
+; X86-SSE41-NEXT:    movdqu 15(%eax), %xmm0
+; X86-SSE41-NEXT:    pxor %xmm1, %xmm0
+; X86-SSE41-NEXT:    por %xmm2, %xmm0
+; X86-SSE41-NEXT:    ptest %xmm0, %xmm0
+; X86-SSE41-NEXT:    sete %al
+; X86-SSE41-NEXT:    retl
+;
+; X64-SSE2-LABEL: length31_eq_prefer128:
 ; X64-SSE2:       # %bb.0:
 ; X64-SSE2-NEXT:    movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT:    movdqu 16(%rdi), %xmm1
+; X64-SSE2-NEXT:    movdqu 15(%rdi), %xmm1
 ; X64-SSE2-NEXT:    movdqu (%rsi), %xmm2
 ; X64-SSE2-NEXT:    pcmpeqb %xmm0, %xmm2
-; X64-SSE2-NEXT:    movdqu 16(%rsi), %xmm0
+; X64-SSE2-NEXT:    movdqu 15(%rsi), %xmm0
 ; X64-SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
 ; X64-SSE2-NEXT:    pand %xmm2, %xmm0
 ; X64-SSE2-NEXT:    pmovmskb %xmm0, %eax
@@ -1428,37 +1698,39 @@ define i1 @length32_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"=
 ; X64-SSE2-NEXT:    sete %al
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX-LABEL: length32_eq_prefer128:
+; X64-SSE41-LABEL: length31_eq_prefer128:
+; X64-SSE41:       # %bb.0:
+; X64-SSE41-NEXT:    movdqu (%rdi), %xmm0
+; X64-SSE41-NEXT:    movdqu 15(%rdi), %xmm1
+; X64-SSE41-NEXT:    movdqu (%rsi), %xmm2
+; X64-SSE41-NEXT:    pxor %xmm0, %xmm2
+; X64-SSE41-NEXT:    movdqu 15(%rsi), %xmm0
+; X64-SSE41-NEXT:    pxor %xmm1, %xmm0
+; X64-SSE41-NEXT:    por %xmm2, %xmm0
+; X64-SSE41-NEXT:    ptest %xmm0, %xmm0
+; X64-SSE41-NEXT:    sete %al
+; X64-SSE41-NEXT:    retq
+;
+; X64-AVX-LABEL: length31_eq_prefer128:
 ; X64-AVX:       # %bb.0:
 ; X64-AVX-NEXT:    vmovdqu (%rdi), %xmm0
-; X64-AVX-NEXT:    vmovdqu 16(%rdi), %xmm1
-; X64-AVX-NEXT:    vpxor 16(%rsi), %xmm1, %xmm1
+; X64-AVX-NEXT:    vmovdqu 15(%rdi), %xmm1
+; X64-AVX-NEXT:    vpxor 15(%rsi), %xmm1, %xmm1
 ; X64-AVX-NEXT:    vpxor (%rsi), %xmm0, %xmm0
 ; X64-AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; X64-AVX-NEXT:    vptest %xmm0, %xmm0
 ; X64-AVX-NEXT:    sete %al
 ; X64-AVX-NEXT:    retq
-;
-; X64-AVX512-LABEL: length32_eq_prefer128:
-; X64-AVX512:       # %bb.0:
-; X64-AVX512-NEXT:    vmovdqu (%rdi), %xmm0
-; X64-AVX512-NEXT:    vmovdqu 16(%rdi), %xmm1
-; X64-AVX512-NEXT:    vpxor 16(%rsi), %xmm1, %xmm1
-; X64-AVX512-NEXT:    vpxor (%rsi), %xmm0, %xmm0
-; X64-AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT:    vptest %xmm0, %xmm0
-; X64-AVX512-NEXT:    sete %al
-; X64-AVX512-NEXT:    retq
-  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 31) nounwind
   %cmp = icmp eq i32 %call, 0
   ret i1 %cmp
 }
 
-define i1 @length32_eq_const(i8* %X) nounwind {
-; X86-NOSSE-LABEL: length32_eq_const:
+define i1 @length31_eq_const(i8* %X) nounwind {
+; X86-NOSSE-LABEL: length31_eq_const:
 ; X86-NOSSE:       # %bb.0:
 ; X86-NOSSE-NEXT:    pushl $0
-; X86-NOSSE-NEXT:    pushl $32
+; X86-NOSSE-NEXT:    pushl $31
 ; X86-NOSSE-NEXT:    pushl $.L.str
 ; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    calll memcmp
@@ -1467,10 +1739,10 @@ define i1 @length32_eq_const(i8* %X) nounwind {
 ; X86-NOSSE-NEXT:    setne %al
 ; X86-NOSSE-NEXT:    retl
 ;
-; X86-SSE1-LABEL: length32_eq_const:
+; X86-SSE1-LABEL: length31_eq_const:
 ; X86-SSE1:       # %bb.0:
 ; X86-SSE1-NEXT:    pushl $0
-; X86-SSE1-NEXT:    pushl $32
+; X86-SSE1-NEXT:    pushl $31
 ; X86-SSE1-NEXT:    pushl $.L.str
 ; X86-SSE1-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-SSE1-NEXT:    calll memcmp
@@ -1479,11 +1751,11 @@ define i1 @length32_eq_const(i8* %X) nounwind {
 ; X86-SSE1-NEXT:    setne %al
 ; X86-SSE1-NEXT:    retl
 ;
-; X86-SSE2-LABEL: length32_eq_const:
+; X86-SSE2-LABEL: length31_eq_const:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    movdqu (%eax), %xmm0
-; X86-SSE2-NEXT:    movdqu 16(%eax), %xmm1
+; X86-SSE2-NEXT:    movdqu 15(%eax), %xmm1
 ; X86-SSE2-NEXT:    pcmpeqb {{\.LCPI.*}}, %xmm1
 ; X86-SSE2-NEXT:    pcmpeqb {{\.LCPI.*}}, %xmm0
 ; X86-SSE2-NEXT:    pand %xmm1, %xmm0
@@ -1492,10 +1764,22 @@ define i1 @length32_eq_const(i8* %X) nounwind {
 ; X86-SSE2-NEXT:    setne %al
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-SSE2-LABEL: length32_eq_const:
+; X86-SSE41-LABEL: length31_eq_const:
+; X86-SSE41:       # %bb.0:
+; X86-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE41-NEXT:    movdqu (%eax), %xmm0
+; X86-SSE41-NEXT:    movdqu 15(%eax), %xmm1
+; X86-SSE41-NEXT:    pxor {{\.LCPI.*}}, %xmm1
+; X86-SSE41-NEXT:    pxor {{\.LCPI.*}}, %xmm0
+; X86-SSE41-NEXT:    por %xmm1, %xmm0
+; X86-SSE41-NEXT:    ptest %xmm0, %xmm0
+; X86-SSE41-NEXT:    setne %al
+; X86-SSE41-NEXT:    retl
+;
+; X64-SSE2-LABEL: length31_eq_const:
 ; X64-SSE2:       # %bb.0:
 ; X64-SSE2-NEXT:    movdqu (%rdi), %xmm0
-; X64-SSE2-NEXT:    movdqu 16(%rdi), %xmm1
+; X64-SSE2-NEXT:    movdqu 15(%rdi), %xmm1
 ; X64-SSE2-NEXT:    pcmpeqb {{.*}}(%rip), %xmm1
 ; X64-SSE2-NEXT:    pcmpeqb {{.*}}(%rip), %xmm0
 ; X64-SSE2-NEXT:    pand %xmm1, %xmm0
@@ -1504,63 +1788,2829 @@ define i1 @length32_eq_const(i8* %X) nounwind {
 ; X64-SSE2-NEXT:    setne %al
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX1-LABEL: length32_eq_const:
+; X64-SSE41-LABEL: length31_eq_const:
+; X64-SSE41:       # %bb.0:
+; X64-SSE41-NEXT:    movdqu (%rdi), %xmm0
+; X64-SSE41-NEXT:    movdqu 15(%rdi), %xmm1
+; X64-SSE41-NEXT:    pxor {{.*}}(%rip), %xmm1
+; X64-SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
+; X64-SSE41-NEXT:    por %xmm1, %xmm0
+; X64-SSE41-NEXT:    ptest %xmm0, %xmm0
+; X64-SSE41-NEXT:    setne %al
+; X64-SSE41-NEXT:    retq
+;
+; X64-AVX-LABEL: length31_eq_const:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT:    vmovdqu 15(%rdi), %xmm1
+; X64-AVX-NEXT:    vpxor {{.*}}(%rip), %xmm1, %xmm1
+; X64-AVX-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vptest %xmm0, %xmm0
+; X64-AVX-NEXT:    setne %al
+; X64-AVX-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 31) nounwind
+  %c = icmp ne i32 %m, 0
+  ret i1 %c
+}
+
+define i32 @length32(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length32:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $32
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: length32:
+; X64:       # %bb.0:
+; X64-NEXT:    movl $32, %edx
+; X64-NEXT:    jmp memcmp # TAILCALL
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind
+  ret i32 %m
+}
+
+; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
+
+define i1 @length32_eq(i8* %x, i8* %y) nounwind {
+; X86-NOSSE-LABEL: length32_eq:
+; X86-NOSSE:       # %bb.0:
+; X86-NOSSE-NEXT:    pushl $0
+; X86-NOSSE-NEXT:    pushl $32
+; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    calll memcmp
+; X86-NOSSE-NEXT:    addl $16, %esp
+; X86-NOSSE-NEXT:    testl %eax, %eax
+; X86-NOSSE-NEXT:    sete %al
+; X86-NOSSE-NEXT:    retl
+;
+; X86-SSE1-LABEL: length32_eq:
+; X86-SSE1:       # %bb.0:
+; X86-SSE1-NEXT:    pushl $0
+; X86-SSE1-NEXT:    pushl $32
+; X86-SSE1-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    calll memcmp
+; X86-SSE1-NEXT:    addl $16, %esp
+; X86-SSE1-NEXT:    testl %eax, %eax
+; X86-SSE1-NEXT:    sete %al
+; X86-SSE1-NEXT:    retl
+;
+; X86-SSE2-LABEL: length32_eq:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movdqu (%ecx), %xmm0
+; X86-SSE2-NEXT:    movdqu 16(%ecx), %xmm1
+; X86-SSE2-NEXT:    movdqu (%eax), %xmm2
+; X86-SSE2-NEXT:    pcmpeqb %xmm0, %xmm2
+; X86-SSE2-NEXT:    movdqu 16(%eax), %xmm0
+; X86-SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
+; X86-SSE2-NEXT:    pand %xmm2, %xmm0
+; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
+; X86-SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
+; X86-SSE2-NEXT:    sete %al
+; X86-SSE2-NEXT:    retl
+;
+; X86-SSE41-LABEL: length32_eq:
+; X86-SSE41:       # %bb.0:
+; X86-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE41-NEXT:    movdqu (%ecx), %xmm0
+; X86-SSE41-NEXT:    movdqu 16(%ecx), %xmm1
+; X86-SSE41-NEXT:    movdqu (%eax), %xmm2
+; X86-SSE41-NEXT:    pxor %xmm0, %xmm2
+; X86-SSE41-NEXT:    movdqu 16(%eax), %xmm0
+; X86-SSE41-NEXT:    pxor %xmm1, %xmm0
+; X86-SSE41-NEXT:    por %xmm2, %xmm0
+; X86-SSE41-NEXT:    ptest %xmm0, %xmm0
+; X86-SSE41-NEXT:    sete %al
+; X86-SSE41-NEXT:    retl
+;
+; X64-SSE2-LABEL: length32_eq:
+; X64-SSE2:       # %bb.0:
+; X64-SSE2-NEXT:    movdqu (%rdi), %xmm0
+; X64-SSE2-NEXT:    movdqu 16(%rdi), %xmm1
+; X64-SSE2-NEXT:    movdqu (%rsi), %xmm2
+; X64-SSE2-NEXT:    pcmpeqb %xmm0, %xmm2
+; X64-SSE2-NEXT:    movdqu 16(%rsi), %xmm0
+; X64-SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
+; X64-SSE2-NEXT:    pand %xmm2, %xmm0
+; X64-SSE2-NEXT:    pmovmskb %xmm0, %eax
+; X64-SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
+; X64-SSE2-NEXT:    sete %al
+; X64-SSE2-NEXT:    retq
+;
+; X64-SSE41-LABEL: length32_eq:
+; X64-SSE41:       # %bb.0:
+; X64-SSE41-NEXT:    movdqu (%rdi), %xmm0
+; X64-SSE41-NEXT:    movdqu 16(%rdi), %xmm1
+; X64-SSE41-NEXT:    movdqu (%rsi), %xmm2
+; X64-SSE41-NEXT:    pxor %xmm0, %xmm2
+; X64-SSE41-NEXT:    movdqu 16(%rsi), %xmm0
+; X64-SSE41-NEXT:    pxor %xmm1, %xmm0
+; X64-SSE41-NEXT:    por %xmm2, %xmm0
+; X64-SSE41-NEXT:    ptest %xmm0, %xmm0
+; X64-SSE41-NEXT:    sete %al
+; X64-SSE41-NEXT:    retq
+;
+; X64-AVX1-LABEL: length32_eq:
 ; X64-AVX1:       # %bb.0:
 ; X64-AVX1-NEXT:    vmovdqu (%rdi), %xmm0
 ; X64-AVX1-NEXT:    vmovdqu 16(%rdi), %xmm1
-; X64-AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm1, %xmm1
-; X64-AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpxor 16(%rsi), %xmm1, %xmm1
+; X64-AVX1-NEXT:    vpxor (%rsi), %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vptest %xmm0, %xmm0
-; X64-AVX1-NEXT:    setne %al
+; X64-AVX1-NEXT:    sete %al
 ; X64-AVX1-NEXT:    retq
 ;
-; X64-AVX2-LABEL: length32_eq_const:
+; X64-AVX2-LABEL: length32_eq:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT:    vpxor {{.*}}(%rip), %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpxor (%rsi), %ymm0, %ymm0
 ; X64-AVX2-NEXT:    vptest %ymm0, %ymm0
-; X64-AVX2-NEXT:    setne %al
+; X64-AVX2-NEXT:    sete %al
 ; X64-AVX2-NEXT:    vzeroupper
 ; X64-AVX2-NEXT:    retq
 ;
-; X64-AVX512-LABEL: length32_eq_const:
-; X64-AVX512:       # %bb.0:
-; X64-AVX512-NEXT:    vmovdqu (%rdi), %ymm0
-; X64-AVX512-NEXT:    vpxor {{.*}}(%rip), %ymm0, %ymm0
-; X64-AVX512-NEXT:    vptest %ymm0, %ymm0
-; X64-AVX512-NEXT:    setne %al
-; X64-AVX512-NEXT:    vzeroupper
-; X64-AVX512-NEXT:    retq
-  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 32) nounwind
-  %c = icmp ne i32 %m, 0
+; X64-AVX512-LABEL: length32_eq:
+; X64-AVX512:       # %bb.0:
+; X64-AVX512-NEXT:    vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT:    vpxor (%rsi), %ymm0, %ymm0
+; X64-AVX512-NEXT:    vptest %ymm0, %ymm0
+; X64-AVX512-NEXT:    sete %al
+; X64-AVX512-NEXT:    vzeroupper
+; X64-AVX512-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length32_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length32_lt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $32
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    shrl $31, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: length32_lt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $32, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    shrl $31, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
+  %cmp = icmp slt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length32_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length32_gt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $32
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setg %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length32_gt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $32, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setg %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
+  %cmp = icmp sgt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length32_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"="128" {
+; X86-NOSSE-LABEL: length32_eq_prefer128:
+; X86-NOSSE:       # %bb.0:
+; X86-NOSSE-NEXT:    pushl $0
+; X86-NOSSE-NEXT:    pushl $32
+; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    calll memcmp
+; X86-NOSSE-NEXT:    addl $16, %esp
+; X86-NOSSE-NEXT:    testl %eax, %eax
+; X86-NOSSE-NEXT:    sete %al
+; X86-NOSSE-NEXT:    retl
+;
+; X86-SSE1-LABEL: length32_eq_prefer128:
+; X86-SSE1:       # %bb.0:
+; X86-SSE1-NEXT:    pushl $0
+; X86-SSE1-NEXT:    pushl $32
+; X86-SSE1-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    calll memcmp
+; X86-SSE1-NEXT:    addl $16, %esp
+; X86-SSE1-NEXT:    testl %eax, %eax
+; X86-SSE1-NEXT:    sete %al
+; X86-SSE1-NEXT:    retl
+;
+; X86-SSE2-LABEL: length32_eq_prefer128:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movdqu (%ecx), %xmm0
+; X86-SSE2-NEXT:    movdqu 16(%ecx), %xmm1
+; X86-SSE2-NEXT:    movdqu (%eax), %xmm2
+; X86-SSE2-NEXT:    pcmpeqb %xmm0, %xmm2
+; X86-SSE2-NEXT:    movdqu 16(%eax), %xmm0
+; X86-SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
+; X86-SSE2-NEXT:    pand %xmm2, %xmm0
+; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
+; X86-SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
+; X86-SSE2-NEXT:    sete %al
+; X86-SSE2-NEXT:    retl
+;
+; X86-SSE41-LABEL: length32_eq_prefer128:
+; X86-SSE41:       # %bb.0:
+; X86-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE41-NEXT:    movdqu (%ecx), %xmm0
+; X86-SSE41-NEXT:    movdqu 16(%ecx), %xmm1
+; X86-SSE41-NEXT:    movdqu (%eax), %xmm2
+; X86-SSE41-NEXT:    pxor %xmm0, %xmm2
+; X86-SSE41-NEXT:    movdqu 16(%eax), %xmm0
+; X86-SSE41-NEXT:    pxor %xmm1, %xmm0
+; X86-SSE41-NEXT:    por %xmm2, %xmm0
+; X86-SSE41-NEXT:    ptest %xmm0, %xmm0
+; X86-SSE41-NEXT:    sete %al
+; X86-SSE41-NEXT:    retl
+;
+; X64-SSE2-LABEL: length32_eq_prefer128:
+; X64-SSE2:       # %bb.0:
+; X64-SSE2-NEXT:    movdqu (%rdi), %xmm0
+; X64-SSE2-NEXT:    movdqu 16(%rdi), %xmm1
+; X64-SSE2-NEXT:    movdqu (%rsi), %xmm2
+; X64-SSE2-NEXT:    pcmpeqb %xmm0, %xmm2
+; X64-SSE2-NEXT:    movdqu 16(%rsi), %xmm0
+; X64-SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
+; X64-SSE2-NEXT:    pand %xmm2, %xmm0
+; X64-SSE2-NEXT:    pmovmskb %xmm0, %eax
+; X64-SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
+; X64-SSE2-NEXT:    sete %al
+; X64-SSE2-NEXT:    retq
+;
+; X64-SSE41-LABEL: length32_eq_prefer128:
+; X64-SSE41:       # %bb.0:
+; X64-SSE41-NEXT:    movdqu (%rdi), %xmm0
+; X64-SSE41-NEXT:    movdqu 16(%rdi), %xmm1
+; X64-SSE41-NEXT:    movdqu (%rsi), %xmm2
+; X64-SSE41-NEXT:    pxor %xmm0, %xmm2
+; X64-SSE41-NEXT:    movdqu 16(%rsi), %xmm0
+; X64-SSE41-NEXT:    pxor %xmm1, %xmm0
+; X64-SSE41-NEXT:    por %xmm2, %xmm0
+; X64-SSE41-NEXT:    ptest %xmm0, %xmm0
+; X64-SSE41-NEXT:    sete %al
+; X64-SSE41-NEXT:    retq
+;
+; X64-AVX-LABEL: length32_eq_prefer128:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT:    vmovdqu 16(%rdi), %xmm1
+; X64-AVX-NEXT:    vpxor 16(%rsi), %xmm1, %xmm1
+; X64-AVX-NEXT:    vpxor (%rsi), %xmm0, %xmm0
+; X64-AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vptest %xmm0, %xmm0
+; X64-AVX-NEXT:    sete %al
+; X64-AVX-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length32_eq_const(i8* %X) nounwind {
+; X86-NOSSE-LABEL: length32_eq_const:
+; X86-NOSSE:       # %bb.0:
+; X86-NOSSE-NEXT:    pushl $0
+; X86-NOSSE-NEXT:    pushl $32
+; X86-NOSSE-NEXT:    pushl $.L.str
+; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    calll memcmp
+; X86-NOSSE-NEXT:    addl $16, %esp
+; X86-NOSSE-NEXT:    testl %eax, %eax
+; X86-NOSSE-NEXT:    setne %al
+; X86-NOSSE-NEXT:    retl
+;
+; X86-SSE1-LABEL: length32_eq_const:
+; X86-SSE1:       # %bb.0:
+; X86-SSE1-NEXT:    pushl $0
+; X86-SSE1-NEXT:    pushl $32
+; X86-SSE1-NEXT:    pushl $.L.str
+; X86-SSE1-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    calll memcmp
+; X86-SSE1-NEXT:    addl $16, %esp
+; X86-SSE1-NEXT:    testl %eax, %eax
+; X86-SSE1-NEXT:    setne %al
+; X86-SSE1-NEXT:    retl
+;
+; X86-SSE2-LABEL: length32_eq_const:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movdqu (%eax), %xmm0
+; X86-SSE2-NEXT:    movdqu 16(%eax), %xmm1
+; X86-SSE2-NEXT:    pcmpeqb {{\.LCPI.*}}, %xmm1
+; X86-SSE2-NEXT:    pcmpeqb {{\.LCPI.*}}, %xmm0
+; X86-SSE2-NEXT:    pand %xmm1, %xmm0
+; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
+; X86-SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
+; X86-SSE2-NEXT:    setne %al
+; X86-SSE2-NEXT:    retl
+;
+; X86-SSE41-LABEL: length32_eq_const:
+; X86-SSE41:       # %bb.0:
+; X86-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE41-NEXT:    movdqu (%eax), %xmm0
+; X86-SSE41-NEXT:    movdqu 16(%eax), %xmm1
+; X86-SSE41-NEXT:    pxor {{\.LCPI.*}}, %xmm1
+; X86-SSE41-NEXT:    pxor {{\.LCPI.*}}, %xmm0
+; X86-SSE41-NEXT:    por %xmm1, %xmm0
+; X86-SSE41-NEXT:    ptest %xmm0, %xmm0
+; X86-SSE41-NEXT:    setne %al
+; X86-SSE41-NEXT:    retl
+;
+; X64-SSE2-LABEL: length32_eq_const:
+; X64-SSE2:       # %bb.0:
+; X64-SSE2-NEXT:    movdqu (%rdi), %xmm0
+; X64-SSE2-NEXT:    movdqu 16(%rdi), %xmm1
+; X64-SSE2-NEXT:    pcmpeqb {{.*}}(%rip), %xmm1
+; X64-SSE2-NEXT:    pcmpeqb {{.*}}(%rip), %xmm0
+; X64-SSE2-NEXT:    pand %xmm1, %xmm0
+; X64-SSE2-NEXT:    pmovmskb %xmm0, %eax
+; X64-SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
+; X64-SSE2-NEXT:    setne %al
+; X64-SSE2-NEXT:    retq
+;
+; X64-SSE41-LABEL: length32_eq_const:
+; X64-SSE41:       # %bb.0:
+; X64-SSE41-NEXT:    movdqu (%rdi), %xmm0
+; X64-SSE41-NEXT:    movdqu 16(%rdi), %xmm1
+; X64-SSE41-NEXT:    pxor {{.*}}(%rip), %xmm1
+; X64-SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
+; X64-SSE41-NEXT:    por %xmm1, %xmm0
+; X64-SSE41-NEXT:    ptest %xmm0, %xmm0
+; X64-SSE41-NEXT:    setne %al
+; X64-SSE41-NEXT:    retq
+;
+; X64-AVX1-LABEL: length32_eq_const:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vmovdqu (%rdi), %xmm0
+; X64-AVX1-NEXT:    vmovdqu 16(%rdi), %xmm1
+; X64-AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm1, %xmm1
+; X64-AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT:    vptest %xmm0, %xmm0
+; X64-AVX1-NEXT:    setne %al
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: length32_eq_const:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vmovdqu (%rdi), %ymm0
+; X64-AVX2-NEXT:    vpxor {{.*}}(%rip), %ymm0, %ymm0
+; X64-AVX2-NEXT:    vptest %ymm0, %ymm0
+; X64-AVX2-NEXT:    setne %al
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512-LABEL: length32_eq_const:
+; X64-AVX512:       # %bb.0:
+; X64-AVX512-NEXT:    vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT:    vpxor {{.*}}(%rip), %ymm0, %ymm0
+; X64-AVX512-NEXT:    vptest %ymm0, %ymm0
+; X64-AVX512-NEXT:    setne %al
+; X64-AVX512-NEXT:    vzeroupper
+; X64-AVX512-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 32) nounwind
+  %c = icmp ne i32 %m, 0
+  ret i1 %c
+}
+
+define i32 @length48(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length48:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $48
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: length48:
+; X64:       # %bb.0:
+; X64-NEXT:    movl $48, %edx
+; X64-NEXT:    jmp memcmp # TAILCALL
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 48) nounwind
+  ret i32 %m
+}
+
+define i1 @length48_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length48_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $48
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-SSE-LABEL: length48_eq:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    pushq %rax
+; X64-SSE-NEXT:    movl $48, %edx
+; X64-SSE-NEXT:    callq memcmp
+; X64-SSE-NEXT:    testl %eax, %eax
+; X64-SSE-NEXT:    sete %al
+; X64-SSE-NEXT:    popq %rcx
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX1-LABEL: length48_eq:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    pushq %rax
+; X64-AVX1-NEXT:    movl $48, %edx
+; X64-AVX1-NEXT:    callq memcmp
+; X64-AVX1-NEXT:    testl %eax, %eax
+; X64-AVX1-NEXT:    sete %al
+; X64-AVX1-NEXT:    popq %rcx
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: length48_eq:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    movq 32(%rdi), %rcx
+; X64-AVX2-NEXT:    movq %rcx, %rax
+; X64-AVX2-NEXT:    movl %ecx, %edx
+; X64-AVX2-NEXT:    shrl $8, %edx
+; X64-AVX2-NEXT:    vmovd %ecx, %xmm0
+; X64-AVX2-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0
+; X64-AVX2-NEXT:    movl %ecx, %edx
+; X64-AVX2-NEXT:    shrl $16, %edx
+; X64-AVX2-NEXT:    vpinsrb $2, %edx, %xmm0, %xmm0
+; X64-AVX2-NEXT:    movl %ecx, %edx
+; X64-AVX2-NEXT:    shrl $24, %edx
+; X64-AVX2-NEXT:    vpinsrb $3, %edx, %xmm0, %xmm0
+; X64-AVX2-NEXT:    movq %rcx, %rdx
+; X64-AVX2-NEXT:    shrq $32, %rdx
+; X64-AVX2-NEXT:    vpinsrb $4, %edx, %xmm0, %xmm0
+; X64-AVX2-NEXT:    movq %rcx, %rdx
+; X64-AVX2-NEXT:    shrq $40, %rcx
+; X64-AVX2-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm1
+; X64-AVX2-NEXT:    vmovdqu (%rdi), %ymm0
+; X64-AVX2-NEXT:    movq 40(%rdi), %rcx
+; X64-AVX2-NEXT:    shrq $48, %rdx
+; X64-AVX2-NEXT:    vpinsrb $6, %edx, %xmm1, %xmm1
+; X64-AVX2-NEXT:    movq %rcx, %rdx
+; X64-AVX2-NEXT:    shrq $56, %rdx
+; X64-AVX2-NEXT:    shrq $56, %rax
+; X64-AVX2-NEXT:    vpinsrb $7, %eax, %xmm1, %xmm1
+; X64-AVX2-NEXT:    movl %ecx, %eax
+; X64-AVX2-NEXT:    shrl $8, %eax
+; X64-AVX2-NEXT:    vpinsrb $8, %ecx, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpinsrb $9, %eax, %xmm1, %xmm1
+; X64-AVX2-NEXT:    movl %ecx, %eax
+; X64-AVX2-NEXT:    shrl $16, %eax
+; X64-AVX2-NEXT:    vpinsrb $10, %eax, %xmm1, %xmm1
+; X64-AVX2-NEXT:    movl %ecx, %eax
+; X64-AVX2-NEXT:    shrl $24, %eax
+; X64-AVX2-NEXT:    vpinsrb $11, %eax, %xmm1, %xmm1
+; X64-AVX2-NEXT:    movq %rcx, %rax
+; X64-AVX2-NEXT:    shrq $32, %rax
+; X64-AVX2-NEXT:    vpinsrb $12, %eax, %xmm1, %xmm1
+; X64-AVX2-NEXT:    movq %rcx, %rax
+; X64-AVX2-NEXT:    shrq $48, %rax
+; X64-AVX2-NEXT:    shrq $40, %rcx
+; X64-AVX2-NEXT:    vpinsrb $13, %ecx, %xmm1, %xmm1
+; X64-AVX2-NEXT:    movq 32(%rsi), %rcx
+; X64-AVX2-NEXT:    vpinsrb $14, %eax, %xmm1, %xmm1
+; X64-AVX2-NEXT:    movq %rcx, %rax
+; X64-AVX2-NEXT:    vpinsrb $15, %edx, %xmm1, %xmm1
+; X64-AVX2-NEXT:    movl %ecx, %edx
+; X64-AVX2-NEXT:    shrl $8, %edx
+; X64-AVX2-NEXT:    vmovd %ecx, %xmm2
+; X64-AVX2-NEXT:    vpinsrb $1, %edx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    movl %ecx, %edx
+; X64-AVX2-NEXT:    shrl $16, %edx
+; X64-AVX2-NEXT:    vpinsrb $2, %edx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    movl %ecx, %edx
+; X64-AVX2-NEXT:    shrl $24, %edx
+; X64-AVX2-NEXT:    vpinsrb $3, %edx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    movq %rcx, %rdx
+; X64-AVX2-NEXT:    shrq $32, %rdx
+; X64-AVX2-NEXT:    vpinsrb $4, %edx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    movq %rcx, %rdx
+; X64-AVX2-NEXT:    shrq $40, %rcx
+; X64-AVX2-NEXT:    vpinsrb $5, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    movq 40(%rsi), %rcx
+; X64-AVX2-NEXT:    shrq $48, %rdx
+; X64-AVX2-NEXT:    vpinsrb $6, %edx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    movq %rcx, %rdx
+; X64-AVX2-NEXT:    shrq $56, %rax
+; X64-AVX2-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
+; X64-AVX2-NEXT:    movl %ecx, %eax
+; X64-AVX2-NEXT:    shrl $8, %eax
+; X64-AVX2-NEXT:    vpinsrb $8, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
+; X64-AVX2-NEXT:    movl %ecx, %eax
+; X64-AVX2-NEXT:    shrl $16, %eax
+; X64-AVX2-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
+; X64-AVX2-NEXT:    movl %ecx, %eax
+; X64-AVX2-NEXT:    shrl $24, %eax
+; X64-AVX2-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
+; X64-AVX2-NEXT:    movq %rcx, %rax
+; X64-AVX2-NEXT:    shrq $32, %rax
+; X64-AVX2-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
+; X64-AVX2-NEXT:    movq %rcx, %rax
+; X64-AVX2-NEXT:    shrq $40, %rcx
+; X64-AVX2-NEXT:    vpinsrb $13, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    shrq $48, %rax
+; X64-AVX2-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
+; X64-AVX2-NEXT:    shrq $56, %rdx
+; X64-AVX2-NEXT:    vpinsrb $15, %edx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpxor (%rsi), %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm1
+; X64-AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vptest %ymm0, %ymm0
+; X64-AVX2-NEXT:    sete %al
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512-LABEL: length48_eq:
+; X64-AVX512:       # %bb.0:
+; X64-AVX512-NEXT:    movq 32(%rdi), %rcx
+; X64-AVX512-NEXT:    movq %rcx, %rax
+; X64-AVX512-NEXT:    movl %ecx, %edx
+; X64-AVX512-NEXT:    shrl $8, %edx
+; X64-AVX512-NEXT:    vmovd %ecx, %xmm0
+; X64-AVX512-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0
+; X64-AVX512-NEXT:    movl %ecx, %edx
+; X64-AVX512-NEXT:    shrl $16, %edx
+; X64-AVX512-NEXT:    vpinsrb $2, %edx, %xmm0, %xmm0
+; X64-AVX512-NEXT:    movl %ecx, %edx
+; X64-AVX512-NEXT:    shrl $24, %edx
+; X64-AVX512-NEXT:    vpinsrb $3, %edx, %xmm0, %xmm0
+; X64-AVX512-NEXT:    movq %rcx, %rdx
+; X64-AVX512-NEXT:    shrq $32, %rdx
+; X64-AVX512-NEXT:    vpinsrb $4, %edx, %xmm0, %xmm0
+; X64-AVX512-NEXT:    movq %rcx, %rdx
+; X64-AVX512-NEXT:    shrq $40, %rcx
+; X64-AVX512-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm1
+; X64-AVX512-NEXT:    vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT:    movq 40(%rdi), %rcx
+; X64-AVX512-NEXT:    shrq $48, %rdx
+; X64-AVX512-NEXT:    vpinsrb $6, %edx, %xmm1, %xmm1
+; X64-AVX512-NEXT:    movq %rcx, %rdx
+; X64-AVX512-NEXT:    shrq $56, %rdx
+; X64-AVX512-NEXT:    shrq $56, %rax
+; X64-AVX512-NEXT:    vpinsrb $7, %eax, %xmm1, %xmm1
+; X64-AVX512-NEXT:    movl %ecx, %eax
+; X64-AVX512-NEXT:    shrl $8, %eax
+; X64-AVX512-NEXT:    vpinsrb $8, %ecx, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vpinsrb $9, %eax, %xmm1, %xmm1
+; X64-AVX512-NEXT:    movl %ecx, %eax
+; X64-AVX512-NEXT:    shrl $16, %eax
+; X64-AVX512-NEXT:    vpinsrb $10, %eax, %xmm1, %xmm1
+; X64-AVX512-NEXT:    movl %ecx, %eax
+; X64-AVX512-NEXT:    shrl $24, %eax
+; X64-AVX512-NEXT:    vpinsrb $11, %eax, %xmm1, %xmm1
+; X64-AVX512-NEXT:    movq %rcx, %rax
+; X64-AVX512-NEXT:    shrq $32, %rax
+; X64-AVX512-NEXT:    vpinsrb $12, %eax, %xmm1, %xmm1
+; X64-AVX512-NEXT:    movq %rcx, %rax
+; X64-AVX512-NEXT:    shrq $48, %rax
+; X64-AVX512-NEXT:    shrq $40, %rcx
+; X64-AVX512-NEXT:    vpinsrb $13, %ecx, %xmm1, %xmm1
+; X64-AVX512-NEXT:    movq 32(%rsi), %rcx
+; X64-AVX512-NEXT:    vpinsrb $14, %eax, %xmm1, %xmm1
+; X64-AVX512-NEXT:    movq %rcx, %rax
+; X64-AVX512-NEXT:    vpinsrb $15, %edx, %xmm1, %xmm1
+; X64-AVX512-NEXT:    movl %ecx, %edx
+; X64-AVX512-NEXT:    shrl $8, %edx
+; X64-AVX512-NEXT:    vmovd %ecx, %xmm2
+; X64-AVX512-NEXT:    vpinsrb $1, %edx, %xmm2, %xmm2
+; X64-AVX512-NEXT:    movl %ecx, %edx
+; X64-AVX512-NEXT:    shrl $16, %edx
+; X64-AVX512-NEXT:    vpinsrb $2, %edx, %xmm2, %xmm2
+; X64-AVX512-NEXT:    movl %ecx, %edx
+; X64-AVX512-NEXT:    shrl $24, %edx
+; X64-AVX512-NEXT:    vpinsrb $3, %edx, %xmm2, %xmm2
+; X64-AVX512-NEXT:    movq %rcx, %rdx
+; X64-AVX512-NEXT:    shrq $32, %rdx
+; X64-AVX512-NEXT:    vpinsrb $4, %edx, %xmm2, %xmm2
+; X64-AVX512-NEXT:    movq %rcx, %rdx
+; X64-AVX512-NEXT:    shrq $40, %rcx
+; X64-AVX512-NEXT:    vpinsrb $5, %ecx, %xmm2, %xmm2
+; X64-AVX512-NEXT:    movq 40(%rsi), %rcx
+; X64-AVX512-NEXT:    shrq $48, %rdx
+; X64-AVX512-NEXT:    vpinsrb $6, %edx, %xmm2, %xmm2
+; X64-AVX512-NEXT:    movq %rcx, %rdx
+; X64-AVX512-NEXT:    shrq $56, %rax
+; X64-AVX512-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
+; X64-AVX512-NEXT:    movl %ecx, %eax
+; X64-AVX512-NEXT:    shrl $8, %eax
+; X64-AVX512-NEXT:    vpinsrb $8, %ecx, %xmm2, %xmm2
+; X64-AVX512-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
+; X64-AVX512-NEXT:    movl %ecx, %eax
+; X64-AVX512-NEXT:    shrl $16, %eax
+; X64-AVX512-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
+; X64-AVX512-NEXT:    movl %ecx, %eax
+; X64-AVX512-NEXT:    shrl $24, %eax
+; X64-AVX512-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
+; X64-AVX512-NEXT:    movq %rcx, %rax
+; X64-AVX512-NEXT:    shrq $32, %rax
+; X64-AVX512-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
+; X64-AVX512-NEXT:    movq %rcx, %rax
+; X64-AVX512-NEXT:    shrq $40, %rcx
+; X64-AVX512-NEXT:    vpinsrb $13, %ecx, %xmm2, %xmm2
+; X64-AVX512-NEXT:    shrq $48, %rax
+; X64-AVX512-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
+; X64-AVX512-NEXT:    shrq $56, %rdx
+; X64-AVX512-NEXT:    vpinsrb $15, %edx, %xmm2, %xmm2
+; X64-AVX512-NEXT:    vpxor (%rsi), %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpxor %ymm2, %ymm1, %ymm1
+; X64-AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vptest %ymm0, %ymm0
+; X64-AVX512-NEXT:    sete %al
+; X64-AVX512-NEXT:    vzeroupper
+; X64-AVX512-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 48) nounwind
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length48_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length48_lt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $48
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    shrl $31, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: length48_lt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $48, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    shrl $31, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 48) nounwind
+  %cmp = icmp slt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length48_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length48_gt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $48
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setg %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length48_gt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $48, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setg %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 48) nounwind
+  %cmp = icmp sgt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length48_eq_prefer128(i8* %x, i8* %y) nounwind "prefer-vector-width"="128" {
+; X86-LABEL: length48_eq_prefer128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $48
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length48_eq_prefer128:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $48, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    sete %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 48) nounwind
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length48_eq_const(i8* %X) nounwind {
+; X86-LABEL: length48_eq_const:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $48
+; X86-NEXT:    pushl $.L.str
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setne %al
+; X86-NEXT:    retl
+;
+; X64-SSE-LABEL: length48_eq_const:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    pushq %rax
+; X64-SSE-NEXT:    movl $.L.str, %esi
+; X64-SSE-NEXT:    movl $48, %edx
+; X64-SSE-NEXT:    callq memcmp
+; X64-SSE-NEXT:    testl %eax, %eax
+; X64-SSE-NEXT:    setne %al
+; X64-SSE-NEXT:    popq %rcx
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX1-LABEL: length48_eq_const:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    pushq %rax
+; X64-AVX1-NEXT:    movl $.L.str, %esi
+; X64-AVX1-NEXT:    movl $48, %edx
+; X64-AVX1-NEXT:    callq memcmp
+; X64-AVX1-NEXT:    testl %eax, %eax
+; X64-AVX1-NEXT:    setne %al
+; X64-AVX1-NEXT:    popq %rcx
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: length48_eq_const:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    pushq %rbp
+; X64-AVX2-NEXT:    pushq %r15
+; X64-AVX2-NEXT:    pushq %r14
+; X64-AVX2-NEXT:    pushq %r12
+; X64-AVX2-NEXT:    pushq %rbx
+; X64-AVX2-NEXT:    vmovdqu (%rdi), %ymm0
+; X64-AVX2-NEXT:    movq 40(%rdi), %rcx
+; X64-AVX2-NEXT:    movq %rcx, %r8
+; X64-AVX2-NEXT:    shrq $56, %r8
+; X64-AVX2-NEXT:    movq %rcx, %r9
+; X64-AVX2-NEXT:    shrq $48, %r9
+; X64-AVX2-NEXT:    movq %rcx, %r10
+; X64-AVX2-NEXT:    shrq $32, %r10
+; X64-AVX2-NEXT:    movl %ecx, %r11d
+; X64-AVX2-NEXT:    shrl $24, %r11d
+; X64-AVX2-NEXT:    movl %ecx, %r14d
+; X64-AVX2-NEXT:    shrl $16, %r14d
+; X64-AVX2-NEXT:    movl %ecx, %r15d
+; X64-AVX2-NEXT:    shrl $8, %r15d
+; X64-AVX2-NEXT:    movq 32(%rdi), %rdi
+; X64-AVX2-NEXT:    movq %rdi, %r12
+; X64-AVX2-NEXT:    shrq $56, %r12
+; X64-AVX2-NEXT:    movq %rdi, %rbx
+; X64-AVX2-NEXT:    shrq $48, %rbx
+; X64-AVX2-NEXT:    movq %rdi, %rdx
+; X64-AVX2-NEXT:    shrq $32, %rdx
+; X64-AVX2-NEXT:    movl %edi, %ebp
+; X64-AVX2-NEXT:    shrl $24, %ebp
+; X64-AVX2-NEXT:    movl %edi, %esi
+; X64-AVX2-NEXT:    shrl $16, %esi
+; X64-AVX2-NEXT:    vmovd %edi, %xmm1
+; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shrl $8, %eax
+; X64-AVX2-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpinsrb $2, %esi, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpinsrb $3, %ebp, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpinsrb $4, %edx, %xmm1, %xmm1
+; X64-AVX2-NEXT:    shrq $40, %rdi
+; X64-AVX2-NEXT:    vpinsrb $5, %edi, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpinsrb $6, %ebx, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpinsrb $7, %r12d, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpinsrb $8, %ecx, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpinsrb $9, %r15d, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpinsrb $10, %r14d, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpinsrb $11, %r11d, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpinsrb $12, %r10d, %xmm1, %xmm1
+; X64-AVX2-NEXT:    shrq $40, %rcx
+; X64-AVX2-NEXT:    vpinsrb $13, %ecx, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpinsrb $14, %r9d, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpinsrb $15, %r8d, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpxor {{.*}}(%rip), %ymm1, %ymm1
+; X64-AVX2-NEXT:    vpxor {{.*}}(%rip), %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vptest %ymm0, %ymm0
+; X64-AVX2-NEXT:    setne %al
+; X64-AVX2-NEXT:    popq %rbx
+; X64-AVX2-NEXT:    popq %r12
+; X64-AVX2-NEXT:    popq %r14
+; X64-AVX2-NEXT:    popq %r15
+; X64-AVX2-NEXT:    popq %rbp
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512-LABEL: length48_eq_const:
+; X64-AVX512:       # %bb.0:
+; X64-AVX512-NEXT:    pushq %rbp
+; X64-AVX512-NEXT:    pushq %r15
+; X64-AVX512-NEXT:    pushq %r14
+; X64-AVX512-NEXT:    pushq %r12
+; X64-AVX512-NEXT:    pushq %rbx
+; X64-AVX512-NEXT:    vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT:    movq 40(%rdi), %rcx
+; X64-AVX512-NEXT:    movq %rcx, %r8
+; X64-AVX512-NEXT:    shrq $56, %r8
+; X64-AVX512-NEXT:    movq %rcx, %r9
+; X64-AVX512-NEXT:    shrq $48, %r9
+; X64-AVX512-NEXT:    movq %rcx, %r10
+; X64-AVX512-NEXT:    shrq $32, %r10
+; X64-AVX512-NEXT:    movl %ecx, %r11d
+; X64-AVX512-NEXT:    shrl $24, %r11d
+; X64-AVX512-NEXT:    movl %ecx, %r14d
+; X64-AVX512-NEXT:    shrl $16, %r14d
+; X64-AVX512-NEXT:    movl %ecx, %r15d
+; X64-AVX512-NEXT:    shrl $8, %r15d
+; X64-AVX512-NEXT:    movq 32(%rdi), %rdi
+; X64-AVX512-NEXT:    movq %rdi, %r12
+; X64-AVX512-NEXT:    shrq $56, %r12
+; X64-AVX512-NEXT:    movq %rdi, %rbx
+; X64-AVX512-NEXT:    shrq $48, %rbx
+; X64-AVX512-NEXT:    movq %rdi, %rdx
+; X64-AVX512-NEXT:    shrq $32, %rdx
+; X64-AVX512-NEXT:    movl %edi, %ebp
+; X64-AVX512-NEXT:    shrl $24, %ebp
+; X64-AVX512-NEXT:    movl %edi, %esi
+; X64-AVX512-NEXT:    shrl $16, %esi
+; X64-AVX512-NEXT:    vmovd %edi, %xmm1
+; X64-AVX512-NEXT:    movl %edi, %eax
+; X64-AVX512-NEXT:    shrl $8, %eax
+; X64-AVX512-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vpinsrb $2, %esi, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vpinsrb $3, %ebp, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vpinsrb $4, %edx, %xmm1, %xmm1
+; X64-AVX512-NEXT:    shrq $40, %rdi
+; X64-AVX512-NEXT:    vpinsrb $5, %edi, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vpinsrb $6, %ebx, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vpinsrb $7, %r12d, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vpinsrb $8, %ecx, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vpinsrb $9, %r15d, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vpinsrb $10, %r14d, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vpinsrb $11, %r11d, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vpinsrb $12, %r10d, %xmm1, %xmm1
+; X64-AVX512-NEXT:    shrq $40, %rcx
+; X64-AVX512-NEXT:    vpinsrb $13, %ecx, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vpinsrb $14, %r9d, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vpinsrb $15, %r8d, %xmm1, %xmm1
+; X64-AVX512-NEXT:    vpxor {{.*}}(%rip), %ymm1, %ymm1
+; X64-AVX512-NEXT:    vpxor {{.*}}(%rip), %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vptest %ymm0, %ymm0
+; X64-AVX512-NEXT:    setne %al
+; X64-AVX512-NEXT:    popq %rbx
+; X64-AVX512-NEXT:    popq %r12
+; X64-AVX512-NEXT:    popq %r14
+; X64-AVX512-NEXT:    popq %r15
+; X64-AVX512-NEXT:    popq %rbp
+; X64-AVX512-NEXT:    vzeroupper
+; X64-AVX512-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 48) nounwind
+  %c = icmp ne i32 %m, 0
+  ret i1 %c
+}
+
+define i32 @length63(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length63:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $63
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: length63:
+; X64:       # %bb.0:
+; X64-NEXT:    movl $63, %edx
+; X64-NEXT:    jmp memcmp # TAILCALL
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 63) nounwind
+  ret i32 %m
+}
+
+define i1 @length63_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length63_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $63
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setne %al
+; X86-NEXT:    retl
+;
+; X64-SSE-LABEL: length63_eq:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    pushq %rax
+; X64-SSE-NEXT:    movl $63, %edx
+; X64-SSE-NEXT:    callq memcmp
+; X64-SSE-NEXT:    testl %eax, %eax
+; X64-SSE-NEXT:    setne %al
+; X64-SSE-NEXT:    popq %rcx
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX1-LABEL: length63_eq:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    pushq %rax
+; X64-AVX1-NEXT:    movl $63, %edx
+; X64-AVX1-NEXT:    callq memcmp
+; X64-AVX1-NEXT:    testl %eax, %eax
+; X64-AVX1-NEXT:    setne %al
+; X64-AVX1-NEXT:    popq %rcx
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: length63_eq:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vmovdqu (%rdi), %ymm0
+; X64-AVX2-NEXT:    vmovdqu 31(%rdi), %ymm1
+; X64-AVX2-NEXT:    vpxor 31(%rsi), %ymm1, %ymm1
+; X64-AVX2-NEXT:    vpxor (%rsi), %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vptest %ymm0, %ymm0
+; X64-AVX2-NEXT:    setne %al
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512-LABEL: length63_eq:
+; X64-AVX512:       # %bb.0:
+; X64-AVX512-NEXT:    vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT:    vmovdqu 31(%rdi), %ymm1
+; X64-AVX512-NEXT:    vpxor 31(%rsi), %ymm1, %ymm1
+; X64-AVX512-NEXT:    vpxor (%rsi), %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vptest %ymm0, %ymm0
+; X64-AVX512-NEXT:    setne %al
+; X64-AVX512-NEXT:    vzeroupper
+; X64-AVX512-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 63) nounwind
+  %cmp = icmp ne i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length63_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length63_lt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $63
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    shrl $31, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: length63_lt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $63, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    shrl $31, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 63) nounwind
+  %cmp = icmp slt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length63_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length63_gt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $63
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setg %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length63_gt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $63, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setg %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 63) nounwind
+  %cmp = icmp sgt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length63_eq_const(i8* %X) nounwind {
+; X86-LABEL: length63_eq_const:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $63
+; X86-NEXT:    pushl $.L.str
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-SSE-LABEL: length63_eq_const:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    pushq %rax
+; X64-SSE-NEXT:    movl $.L.str, %esi
+; X64-SSE-NEXT:    movl $63, %edx
+; X64-SSE-NEXT:    callq memcmp
+; X64-SSE-NEXT:    testl %eax, %eax
+; X64-SSE-NEXT:    sete %al
+; X64-SSE-NEXT:    popq %rcx
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX1-LABEL: length63_eq_const:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    pushq %rax
+; X64-AVX1-NEXT:    movl $.L.str, %esi
+; X64-AVX1-NEXT:    movl $63, %edx
+; X64-AVX1-NEXT:    callq memcmp
+; X64-AVX1-NEXT:    testl %eax, %eax
+; X64-AVX1-NEXT:    sete %al
+; X64-AVX1-NEXT:    popq %rcx
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: length63_eq_const:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vmovdqu (%rdi), %ymm0
+; X64-AVX2-NEXT:    vmovdqu 31(%rdi), %ymm1
+; X64-AVX2-NEXT:    vpxor {{.*}}(%rip), %ymm1, %ymm1
+; X64-AVX2-NEXT:    vpxor {{.*}}(%rip), %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vptest %ymm0, %ymm0
+; X64-AVX2-NEXT:    sete %al
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512-LABEL: length63_eq_const:
+; X64-AVX512:       # %bb.0:
+; X64-AVX512-NEXT:    vmovdqu (%rdi), %ymm0
+; X64-AVX512-NEXT:    vmovdqu 31(%rdi), %ymm1
+; X64-AVX512-NEXT:    vpxor {{.*}}(%rip), %ymm1, %ymm1
+; X64-AVX512-NEXT:    vpxor {{.*}}(%rip), %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vptest %ymm0, %ymm0
+; X64-AVX512-NEXT:    sete %al
+; X64-AVX512-NEXT:    vzeroupper
+; X64-AVX512-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 63) nounwind
+  %c = icmp eq i32 %m, 0
+  ret i1 %c
+}
+
+define i32 @length64(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $64
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: length64:
+; X64:       # %bb.0:
+; X64-NEXT:    movl $64, %edx
+; X64-NEXT:    jmp memcmp # TAILCALL
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 64) nounwind
+  ret i32 %m
+}
+
+define i1 @length64_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length64_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $64
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setne %al
+; X86-NEXT:    retl
+;
+; X64-SSE-LABEL: length64_eq:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    pushq %rax
+; X64-SSE-NEXT:    movl $64, %edx
+; X64-SSE-NEXT:    callq memcmp
+; X64-SSE-NEXT:    testl %eax, %eax
+; X64-SSE-NEXT:    setne %al
+; X64-SSE-NEXT:    popq %rcx
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX1-LABEL: length64_eq:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    pushq %rax
+; X64-AVX1-NEXT:    movl $64, %edx
+; X64-AVX1-NEXT:    callq memcmp
+; X64-AVX1-NEXT:    testl %eax, %eax
+; X64-AVX1-NEXT:    setne %al
+; X64-AVX1-NEXT:    popq %rcx
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: length64_eq:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vmovdqu (%rdi), %ymm0
+; X64-AVX2-NEXT:    vmovdqu 32(%rdi), %ymm1
+; X64-AVX2-NEXT:    vpxor 32(%rsi), %ymm1, %ymm1
+; X64-AVX2-NEXT:    vpxor (%rsi), %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vptest %ymm0, %ymm0
+; X64-AVX2-NEXT:    setne %al
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512F-LABEL: length64_eq:
+; X64-AVX512F:       # %bb.0:
+; X64-AVX512F-NEXT:    vmovdqu64 (%rdi), %zmm0
+; X64-AVX512F-NEXT:    vpcmpeqd (%rsi), %zmm0, %k0
+; X64-AVX512F-NEXT:    kortestw %k0, %k0
+; X64-AVX512F-NEXT:    setae %al
+; X64-AVX512F-NEXT:    vzeroupper
+; X64-AVX512F-NEXT:    retq
+;
+; X64-AVX512BW-LABEL: length64_eq:
+; X64-AVX512BW:       # %bb.0:
+; X64-AVX512BW-NEXT:    vmovdqu64 (%rdi), %zmm0
+; X64-AVX512BW-NEXT:    vpcmpeqb (%rsi), %zmm0, %k0
+; X64-AVX512BW-NEXT:    kortestq %k0, %k0
+; X64-AVX512BW-NEXT:    setae %al
+; X64-AVX512BW-NEXT:    vzeroupper
+; X64-AVX512BW-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
+  %cmp = icmp ne i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length64_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length64_lt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $64
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    shrl $31, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: length64_lt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $64, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    shrl $31, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
+  %cmp = icmp slt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length64_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length64_gt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $64
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setg %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length64_gt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $64, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setg %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
+  %cmp = icmp sgt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length64_eq_const(i8* %X) nounwind {
+; X86-LABEL: length64_eq_const:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $64
+; X86-NEXT:    pushl $.L.str
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-SSE-LABEL: length64_eq_const:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    pushq %rax
+; X64-SSE-NEXT:    movl $.L.str, %esi
+; X64-SSE-NEXT:    movl $64, %edx
+; X64-SSE-NEXT:    callq memcmp
+; X64-SSE-NEXT:    testl %eax, %eax
+; X64-SSE-NEXT:    sete %al
+; X64-SSE-NEXT:    popq %rcx
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX1-LABEL: length64_eq_const:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    pushq %rax
+; X64-AVX1-NEXT:    movl $.L.str, %esi
+; X64-AVX1-NEXT:    movl $64, %edx
+; X64-AVX1-NEXT:    callq memcmp
+; X64-AVX1-NEXT:    testl %eax, %eax
+; X64-AVX1-NEXT:    sete %al
+; X64-AVX1-NEXT:    popq %rcx
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: length64_eq_const:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vmovdqu (%rdi), %ymm0
+; X64-AVX2-NEXT:    vmovdqu 32(%rdi), %ymm1
+; X64-AVX2-NEXT:    vpxor {{.*}}(%rip), %ymm1, %ymm1
+; X64-AVX2-NEXT:    vpxor {{.*}}(%rip), %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vptest %ymm0, %ymm0
+; X64-AVX2-NEXT:    sete %al
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512F-LABEL: length64_eq_const:
+; X64-AVX512F:       # %bb.0:
+; X64-AVX512F-NEXT:    vmovdqu64 (%rdi), %zmm0
+; X64-AVX512F-NEXT:    vpcmpeqd {{.*}}(%rip), %zmm0, %k0
+; X64-AVX512F-NEXT:    kortestw %k0, %k0
+; X64-AVX512F-NEXT:    setb %al
+; X64-AVX512F-NEXT:    vzeroupper
+; X64-AVX512F-NEXT:    retq
+;
+; X64-AVX512BW-LABEL: length64_eq_const:
+; X64-AVX512BW:       # %bb.0:
+; X64-AVX512BW-NEXT:    vmovdqu64 (%rdi), %zmm0
+; X64-AVX512BW-NEXT:    vpcmpeqb {{.*}}(%rip), %zmm0, %k0
+; X64-AVX512BW-NEXT:    kortestq %k0, %k0
+; X64-AVX512BW-NEXT:    setb %al
+; X64-AVX512BW-NEXT:    vzeroupper
+; X64-AVX512BW-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
+  %c = icmp eq i32 %m, 0
+  ret i1 %c
+}
+
+define i32 @length96(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length96:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $96
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: length96:
+; X64:       # %bb.0:
+; X64-NEXT:    movl $96, %edx
+; X64-NEXT:    jmp memcmp # TAILCALL
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 96) nounwind
+  ret i32 %m
+}
+
+define i1 @length96_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length96_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $96
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setne %al
+; X86-NEXT:    retl
+;
+; X64-SSE-LABEL: length96_eq:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    pushq %rax
+; X64-SSE-NEXT:    movl $96, %edx
+; X64-SSE-NEXT:    callq memcmp
+; X64-SSE-NEXT:    testl %eax, %eax
+; X64-SSE-NEXT:    setne %al
+; X64-SSE-NEXT:    popq %rcx
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX1-LABEL: length96_eq:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    pushq %rax
+; X64-AVX1-NEXT:    movl $96, %edx
+; X64-AVX1-NEXT:    callq memcmp
+; X64-AVX1-NEXT:    testl %eax, %eax
+; X64-AVX1-NEXT:    setne %al
+; X64-AVX1-NEXT:    popq %rcx
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: length96_eq:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    pushq %rax
+; X64-AVX2-NEXT:    movl $96, %edx
+; X64-AVX2-NEXT:    callq memcmp
+; X64-AVX2-NEXT:    testl %eax, %eax
+; X64-AVX2-NEXT:    setne %al
+; X64-AVX2-NEXT:    popq %rcx
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512F-LABEL: length96_eq:
+; X64-AVX512F:       # %bb.0:
+; X64-AVX512F-NEXT:    movq 80(%rdi), %rax
+; X64-AVX512F-NEXT:    vmovd %eax, %xmm0
+; X64-AVX512F-NEXT:    shrq $32, %rax
+; X64-AVX512F-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; X64-AVX512F-NEXT:    movq 88(%rdi), %rax
+; X64-AVX512F-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
+; X64-AVX512F-NEXT:    shrq $32, %rax
+; X64-AVX512F-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
+; X64-AVX512F-NEXT:    movq 64(%rdi), %rax
+; X64-AVX512F-NEXT:    vmovd %eax, %xmm1
+; X64-AVX512F-NEXT:    shrq $32, %rax
+; X64-AVX512F-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
+; X64-AVX512F-NEXT:    vmovdqu64 (%rdi), %zmm2
+; X64-AVX512F-NEXT:    movq 72(%rdi), %rax
+; X64-AVX512F-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
+; X64-AVX512F-NEXT:    shrq $32, %rax
+; X64-AVX512F-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm1
+; X64-AVX512F-NEXT:    movq 80(%rsi), %rax
+; X64-AVX512F-NEXT:    vmovd %eax, %xmm3
+; X64-AVX512F-NEXT:    shrq $32, %rax
+; X64-AVX512F-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
+; X64-AVX512F-NEXT:    movq 88(%rsi), %rax
+; X64-AVX512F-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
+; X64-AVX512F-NEXT:    shrq $32, %rax
+; X64-AVX512F-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm3
+; X64-AVX512F-NEXT:    movq 64(%rsi), %rax
+; X64-AVX512F-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512F-NEXT:    shrq $32, %rax
+; X64-AVX512F-NEXT:    vpinsrd $1, %eax, %xmm4, %xmm4
+; X64-AVX512F-NEXT:    movq 72(%rsi), %rax
+; X64-AVX512F-NEXT:    vpinsrd $2, %eax, %xmm4, %xmm4
+; X64-AVX512F-NEXT:    shrq $32, %rax
+; X64-AVX512F-NEXT:    vpinsrd $3, %eax, %xmm4, %xmm4
+; X64-AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; X64-AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm4, %ymm1
+; X64-AVX512F-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
+; X64-AVX512F-NEXT:    vpcmpeqd (%rsi), %zmm2, %k0 {%k1}
+; X64-AVX512F-NEXT:    kortestw %k0, %k0
+; X64-AVX512F-NEXT:    setae %al
+; X64-AVX512F-NEXT:    vzeroupper
+; X64-AVX512F-NEXT:    retq
+;
+; X64-AVX512BW-LABEL: length96_eq:
+; X64-AVX512BW:       # %bb.0:
+; X64-AVX512BW-NEXT:    movq 80(%rdi), %rcx
+; X64-AVX512BW-NEXT:    movq %rcx, %rax
+; X64-AVX512BW-NEXT:    movl %ecx, %edx
+; X64-AVX512BW-NEXT:    shrl $8, %edx
+; X64-AVX512BW-NEXT:    vmovd %ecx, %xmm0
+; X64-AVX512BW-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movl %ecx, %edx
+; X64-AVX512BW-NEXT:    shrl $16, %edx
+; X64-AVX512BW-NEXT:    vpinsrb $2, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movl %ecx, %edx
+; X64-AVX512BW-NEXT:    shrl $24, %edx
+; X64-AVX512BW-NEXT:    vpinsrb $3, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movq %rcx, %rdx
+; X64-AVX512BW-NEXT:    shrq $32, %rdx
+; X64-AVX512BW-NEXT:    vpinsrb $4, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movq %rcx, %rdx
+; X64-AVX512BW-NEXT:    shrq $40, %rcx
+; X64-AVX512BW-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movq 88(%rdi), %rcx
+; X64-AVX512BW-NEXT:    shrq $48, %rdx
+; X64-AVX512BW-NEXT:    vpinsrb $6, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movq %rcx, %rdx
+; X64-AVX512BW-NEXT:    shrq $56, %rax
+; X64-AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movl %ecx, %eax
+; X64-AVX512BW-NEXT:    shrl $8, %eax
+; X64-AVX512BW-NEXT:    vpinsrb $8, %ecx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movl %ecx, %eax
+; X64-AVX512BW-NEXT:    shrl $16, %eax
+; X64-AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movl %ecx, %eax
+; X64-AVX512BW-NEXT:    shrl $24, %eax
+; X64-AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movq %rcx, %rax
+; X64-AVX512BW-NEXT:    shrq $32, %rax
+; X64-AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movq %rcx, %rax
+; X64-AVX512BW-NEXT:    shrq $40, %rcx
+; X64-AVX512BW-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movq 64(%rdi), %rcx
+; X64-AVX512BW-NEXT:    shrq $48, %rax
+; X64-AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movq %rcx, %rax
+; X64-AVX512BW-NEXT:    shrq $56, %rdx
+; X64-AVX512BW-NEXT:    vpinsrb $15, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movl %ecx, %edx
+; X64-AVX512BW-NEXT:    shrl $8, %edx
+; X64-AVX512BW-NEXT:    vmovd %ecx, %xmm1
+; X64-AVX512BW-NEXT:    vpinsrb $1, %edx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT:    movl %ecx, %edx
+; X64-AVX512BW-NEXT:    shrl $16, %edx
+; X64-AVX512BW-NEXT:    vpinsrb $2, %edx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT:    movl %ecx, %edx
+; X64-AVX512BW-NEXT:    shrl $24, %edx
+; X64-AVX512BW-NEXT:    vpinsrb $3, %edx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT:    movq %rcx, %rdx
+; X64-AVX512BW-NEXT:    shrq $32, %rdx
+; X64-AVX512BW-NEXT:    vpinsrb $4, %edx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT:    movq %rcx, %rdx
+; X64-AVX512BW-NEXT:    shrq $40, %rcx
+; X64-AVX512BW-NEXT:    vpinsrb $5, %ecx, %xmm1, %xmm2
+; X64-AVX512BW-NEXT:    vmovdqu64 (%rdi), %zmm1
+; X64-AVX512BW-NEXT:    movq 72(%rdi), %rcx
+; X64-AVX512BW-NEXT:    shrq $48, %rdx
+; X64-AVX512BW-NEXT:    vpinsrb $6, %edx, %xmm2, %xmm2
+; X64-AVX512BW-NEXT:    movq %rcx, %rdx
+; X64-AVX512BW-NEXT:    shrq $56, %rdx
+; X64-AVX512BW-NEXT:    shrq $56, %rax
+; X64-AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
+; X64-AVX512BW-NEXT:    movl %ecx, %eax
+; X64-AVX512BW-NEXT:    shrl $8, %eax
+; X64-AVX512BW-NEXT:    vpinsrb $8, %ecx, %xmm2, %xmm2
+; X64-AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
+; X64-AVX512BW-NEXT:    movl %ecx, %eax
+; X64-AVX512BW-NEXT:    shrl $16, %eax
+; X64-AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
+; X64-AVX512BW-NEXT:    movl %ecx, %eax
+; X64-AVX512BW-NEXT:    shrl $24, %eax
+; X64-AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
+; X64-AVX512BW-NEXT:    movq %rcx, %rax
+; X64-AVX512BW-NEXT:    shrq $32, %rax
+; X64-AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
+; X64-AVX512BW-NEXT:    movq %rcx, %rax
+; X64-AVX512BW-NEXT:    shrq $48, %rax
+; X64-AVX512BW-NEXT:    shrq $40, %rcx
+; X64-AVX512BW-NEXT:    vpinsrb $13, %ecx, %xmm2, %xmm2
+; X64-AVX512BW-NEXT:    movq 80(%rsi), %rcx
+; X64-AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
+; X64-AVX512BW-NEXT:    movq %rcx, %rax
+; X64-AVX512BW-NEXT:    vpinsrb $15, %edx, %xmm2, %xmm2
+; X64-AVX512BW-NEXT:    movl %ecx, %edx
+; X64-AVX512BW-NEXT:    shrl $8, %edx
+; X64-AVX512BW-NEXT:    vmovd %ecx, %xmm3
+; X64-AVX512BW-NEXT:    vpinsrb $1, %edx, %xmm3, %xmm3
+; X64-AVX512BW-NEXT:    movl %ecx, %edx
+; X64-AVX512BW-NEXT:    shrl $16, %edx
+; X64-AVX512BW-NEXT:    vpinsrb $2, %edx, %xmm3, %xmm3
+; X64-AVX512BW-NEXT:    movl %ecx, %edx
+; X64-AVX512BW-NEXT:    shrl $24, %edx
+; X64-AVX512BW-NEXT:    vpinsrb $3, %edx, %xmm3, %xmm3
+; X64-AVX512BW-NEXT:    movq %rcx, %rdx
+; X64-AVX512BW-NEXT:    shrq $32, %rdx
+; X64-AVX512BW-NEXT:    vpinsrb $4, %edx, %xmm3, %xmm3
+; X64-AVX512BW-NEXT:    movq %rcx, %rdx
+; X64-AVX512BW-NEXT:    shrq $40, %rcx
+; X64-AVX512BW-NEXT:    vpinsrb $5, %ecx, %xmm3, %xmm3
+; X64-AVX512BW-NEXT:    movq 88(%rsi), %rcx
+; X64-AVX512BW-NEXT:    shrq $48, %rdx
+; X64-AVX512BW-NEXT:    vpinsrb $6, %edx, %xmm3, %xmm3
+; X64-AVX512BW-NEXT:    movq %rcx, %rdx
+; X64-AVX512BW-NEXT:    shrq $56, %rax
+; X64-AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
+; X64-AVX512BW-NEXT:    movl %ecx, %eax
+; X64-AVX512BW-NEXT:    shrl $8, %eax
+; X64-AVX512BW-NEXT:    vpinsrb $8, %ecx, %xmm3, %xmm3
+; X64-AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
+; X64-AVX512BW-NEXT:    movl %ecx, %eax
+; X64-AVX512BW-NEXT:    shrl $16, %eax
+; X64-AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
+; X64-AVX512BW-NEXT:    movl %ecx, %eax
+; X64-AVX512BW-NEXT:    shrl $24, %eax
+; X64-AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
+; X64-AVX512BW-NEXT:    movq %rcx, %rax
+; X64-AVX512BW-NEXT:    shrq $32, %rax
+; X64-AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
+; X64-AVX512BW-NEXT:    movq %rcx, %rax
+; X64-AVX512BW-NEXT:    shrq $40, %rcx
+; X64-AVX512BW-NEXT:    vpinsrb $13, %ecx, %xmm3, %xmm3
+; X64-AVX512BW-NEXT:    movq 64(%rsi), %rcx
+; X64-AVX512BW-NEXT:    shrq $48, %rax
+; X64-AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
+; X64-AVX512BW-NEXT:    movq %rcx, %rax
+; X64-AVX512BW-NEXT:    shrq $56, %rdx
+; X64-AVX512BW-NEXT:    vpinsrb $15, %edx, %xmm3, %xmm3
+; X64-AVX512BW-NEXT:    movl %ecx, %edx
+; X64-AVX512BW-NEXT:    shrl $8, %edx
+; X64-AVX512BW-NEXT:    vmovd %ecx, %xmm4
+; X64-AVX512BW-NEXT:    vpinsrb $1, %edx, %xmm4, %xmm4
+; X64-AVX512BW-NEXT:    movl %ecx, %edx
+; X64-AVX512BW-NEXT:    shrl $16, %edx
+; X64-AVX512BW-NEXT:    vpinsrb $2, %edx, %xmm4, %xmm4
+; X64-AVX512BW-NEXT:    movl %ecx, %edx
+; X64-AVX512BW-NEXT:    shrl $24, %edx
+; X64-AVX512BW-NEXT:    vpinsrb $3, %edx, %xmm4, %xmm4
+; X64-AVX512BW-NEXT:    movq %rcx, %rdx
+; X64-AVX512BW-NEXT:    shrq $32, %rdx
+; X64-AVX512BW-NEXT:    vpinsrb $4, %edx, %xmm4, %xmm4
+; X64-AVX512BW-NEXT:    movq %rcx, %rdx
+; X64-AVX512BW-NEXT:    shrq $40, %rcx
+; X64-AVX512BW-NEXT:    vpinsrb $5, %ecx, %xmm4, %xmm4
+; X64-AVX512BW-NEXT:    movq 72(%rsi), %rcx
+; X64-AVX512BW-NEXT:    shrq $48, %rdx
+; X64-AVX512BW-NEXT:    vpinsrb $6, %edx, %xmm4, %xmm4
+; X64-AVX512BW-NEXT:    movq %rcx, %rdx
+; X64-AVX512BW-NEXT:    shrq $56, %rax
+; X64-AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm4, %xmm4
+; X64-AVX512BW-NEXT:    movl %ecx, %eax
+; X64-AVX512BW-NEXT:    shrl $8, %eax
+; X64-AVX512BW-NEXT:    vpinsrb $8, %ecx, %xmm4, %xmm4
+; X64-AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm4, %xmm4
+; X64-AVX512BW-NEXT:    movl %ecx, %eax
+; X64-AVX512BW-NEXT:    shrl $16, %eax
+; X64-AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm4, %xmm4
+; X64-AVX512BW-NEXT:    movl %ecx, %eax
+; X64-AVX512BW-NEXT:    shrl $24, %eax
+; X64-AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm4, %xmm4
+; X64-AVX512BW-NEXT:    movq %rcx, %rax
+; X64-AVX512BW-NEXT:    shrq $32, %rax
+; X64-AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm4, %xmm4
+; X64-AVX512BW-NEXT:    movq %rcx, %rax
+; X64-AVX512BW-NEXT:    shrq $40, %rcx
+; X64-AVX512BW-NEXT:    vpinsrb $13, %ecx, %xmm4, %xmm4
+; X64-AVX512BW-NEXT:    shrq $48, %rax
+; X64-AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm4, %xmm4
+; X64-AVX512BW-NEXT:    shrq $56, %rdx
+; X64-AVX512BW-NEXT:    vpinsrb $15, %edx, %xmm4, %xmm4
+; X64-AVX512BW-NEXT:    vinserti128 $1, %xmm0, %ymm2, %ymm0
+; X64-AVX512BW-NEXT:    vinserti128 $1, %xmm3, %ymm4, %ymm2
+; X64-AVX512BW-NEXT:    vpcmpeqb %zmm2, %zmm0, %k1
+; X64-AVX512BW-NEXT:    vpcmpeqb (%rsi), %zmm1, %k0 {%k1}
+; X64-AVX512BW-NEXT:    kortestq %k0, %k0
+; X64-AVX512BW-NEXT:    setae %al
+; X64-AVX512BW-NEXT:    vzeroupper
+; X64-AVX512BW-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 96) nounwind
+  %cmp = icmp ne i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length96_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length96_lt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $96
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    shrl $31, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: length96_lt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $96, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    shrl $31, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 96) nounwind
+  %cmp = icmp slt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length96_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length96_gt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $96
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setg %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length96_gt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $96, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setg %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 96) nounwind
+  %cmp = icmp sgt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length96_eq_const(i8* %X) nounwind {
+; X86-LABEL: length96_eq_const:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $96
+; X86-NEXT:    pushl $.L.str
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-SSE-LABEL: length96_eq_const:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    pushq %rax
+; X64-SSE-NEXT:    movl $.L.str, %esi
+; X64-SSE-NEXT:    movl $96, %edx
+; X64-SSE-NEXT:    callq memcmp
+; X64-SSE-NEXT:    testl %eax, %eax
+; X64-SSE-NEXT:    sete %al
+; X64-SSE-NEXT:    popq %rcx
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX1-LABEL: length96_eq_const:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    pushq %rax
+; X64-AVX1-NEXT:    movl $.L.str, %esi
+; X64-AVX1-NEXT:    movl $96, %edx
+; X64-AVX1-NEXT:    callq memcmp
+; X64-AVX1-NEXT:    testl %eax, %eax
+; X64-AVX1-NEXT:    sete %al
+; X64-AVX1-NEXT:    popq %rcx
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: length96_eq_const:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    pushq %rax
+; X64-AVX2-NEXT:    movl $.L.str, %esi
+; X64-AVX2-NEXT:    movl $96, %edx
+; X64-AVX2-NEXT:    callq memcmp
+; X64-AVX2-NEXT:    testl %eax, %eax
+; X64-AVX2-NEXT:    sete %al
+; X64-AVX2-NEXT:    popq %rcx
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512F-LABEL: length96_eq_const:
+; X64-AVX512F:       # %bb.0:
+; X64-AVX512F-NEXT:    vmovdqu64 (%rdi), %zmm0
+; X64-AVX512F-NEXT:    movq 72(%rdi), %rax
+; X64-AVX512F-NEXT:    movq 64(%rdi), %rcx
+; X64-AVX512F-NEXT:    vmovd %ecx, %xmm1
+; X64-AVX512F-NEXT:    shrq $32, %rcx
+; X64-AVX512F-NEXT:    vpinsrd $1, %ecx, %xmm1, %xmm1
+; X64-AVX512F-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
+; X64-AVX512F-NEXT:    shrq $32, %rax
+; X64-AVX512F-NEXT:    movq 88(%rdi), %rcx
+; X64-AVX512F-NEXT:    movq 80(%rdi), %rdx
+; X64-AVX512F-NEXT:    vmovd %edx, %xmm2
+; X64-AVX512F-NEXT:    shrq $32, %rdx
+; X64-AVX512F-NEXT:    vpinsrd $1, %edx, %xmm2, %xmm2
+; X64-AVX512F-NEXT:    vpinsrd $2, %ecx, %xmm2, %xmm2
+; X64-AVX512F-NEXT:    shrq $32, %rcx
+; X64-AVX512F-NEXT:    vpinsrd $3, %ecx, %xmm2, %xmm2
+; X64-AVX512F-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm1
+; X64-AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
+; X64-AVX512F-NEXT:    vpcmpeqd {{.*}}(%rip), %zmm0, %k1
+; X64-AVX512F-NEXT:    vpcmpeqd {{.*}}(%rip), %zmm1, %k0 {%k1}
+; X64-AVX512F-NEXT:    kortestw %k0, %k0
+; X64-AVX512F-NEXT:    setb %al
+; X64-AVX512F-NEXT:    vzeroupper
+; X64-AVX512F-NEXT:    retq
+;
+; X64-AVX512BW-LABEL: length96_eq_const:
+; X64-AVX512BW:       # %bb.0:
+; X64-AVX512BW-NEXT:    movq 80(%rdi), %rax
+; X64-AVX512BW-NEXT:    movq %rax, %rcx
+; X64-AVX512BW-NEXT:    vmovd %eax, %xmm0
+; X64-AVX512BW-NEXT:    movl %eax, %edx
+; X64-AVX512BW-NEXT:    shrl $8, %edx
+; X64-AVX512BW-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movl %eax, %edx
+; X64-AVX512BW-NEXT:    shrl $16, %edx
+; X64-AVX512BW-NEXT:    vpinsrb $2, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movl %eax, %edx
+; X64-AVX512BW-NEXT:    shrl $24, %edx
+; X64-AVX512BW-NEXT:    vpinsrb $3, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movq %rax, %rdx
+; X64-AVX512BW-NEXT:    shrq $32, %rdx
+; X64-AVX512BW-NEXT:    vpinsrb $4, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movq %rax, %rdx
+; X64-AVX512BW-NEXT:    shrq $40, %rax
+; X64-AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movq 88(%rdi), %rax
+; X64-AVX512BW-NEXT:    shrq $48, %rdx
+; X64-AVX512BW-NEXT:    vpinsrb $6, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movq %rax, %rdx
+; X64-AVX512BW-NEXT:    shrq $56, %rcx
+; X64-AVX512BW-NEXT:    vpinsrb $7, %ecx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movl %eax, %ecx
+; X64-AVX512BW-NEXT:    shrl $8, %ecx
+; X64-AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    vpinsrb $9, %ecx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movl %eax, %ecx
+; X64-AVX512BW-NEXT:    shrl $16, %ecx
+; X64-AVX512BW-NEXT:    vpinsrb $10, %ecx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movl %eax, %ecx
+; X64-AVX512BW-NEXT:    shrl $24, %ecx
+; X64-AVX512BW-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movq %rax, %rcx
+; X64-AVX512BW-NEXT:    shrq $32, %rcx
+; X64-AVX512BW-NEXT:    vpinsrb $12, %ecx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movq %rax, %rcx
+; X64-AVX512BW-NEXT:    shrq $40, %rax
+; X64-AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movq 64(%rdi), %rax
+; X64-AVX512BW-NEXT:    shrq $48, %rcx
+; X64-AVX512BW-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movq %rax, %rcx
+; X64-AVX512BW-NEXT:    shrq $56, %rdx
+; X64-AVX512BW-NEXT:    vpinsrb $15, %edx, %xmm0, %xmm0
+; X64-AVX512BW-NEXT:    movl %eax, %edx
+; X64-AVX512BW-NEXT:    shrl $8, %edx
+; X64-AVX512BW-NEXT:    vmovd %eax, %xmm1
+; X64-AVX512BW-NEXT:    vpinsrb $1, %edx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT:    movl %eax, %edx
+; X64-AVX512BW-NEXT:    shrl $16, %edx
+; X64-AVX512BW-NEXT:    vpinsrb $2, %edx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT:    movl %eax, %edx
+; X64-AVX512BW-NEXT:    shrl $24, %edx
+; X64-AVX512BW-NEXT:    vpinsrb $3, %edx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT:    movq %rax, %rdx
+; X64-AVX512BW-NEXT:    shrq $32, %rdx
+; X64-AVX512BW-NEXT:    vpinsrb $4, %edx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT:    movq %rax, %rdx
+; X64-AVX512BW-NEXT:    shrq $40, %rax
+; X64-AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
+; X64-AVX512BW-NEXT:    vmovdqu64 (%rdi), %zmm2
+; X64-AVX512BW-NEXT:    movq 72(%rdi), %rax
+; X64-AVX512BW-NEXT:    shrq $48, %rdx
+; X64-AVX512BW-NEXT:    vpinsrb $6, %edx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT:    movq %rax, %rdx
+; X64-AVX512BW-NEXT:    shrq $56, %rcx
+; X64-AVX512BW-NEXT:    vpinsrb $7, %ecx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT:    movl %eax, %ecx
+; X64-AVX512BW-NEXT:    shrl $8, %ecx
+; X64-AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm1, %xmm1
+; X64-AVX512BW-NEXT:    vpinsrb $9, %ecx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT:    movl %eax, %ecx
+; X64-AVX512BW-NEXT:    shrl $16, %ecx
+; X64-AVX512BW-NEXT:    vpinsrb $10, %ecx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT:    movl %eax, %ecx
+; X64-AVX512BW-NEXT:    shrl $24, %ecx
+; X64-AVX512BW-NEXT:    vpinsrb $11, %ecx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT:    movq %rax, %rcx
+; X64-AVX512BW-NEXT:    shrq $32, %rcx
+; X64-AVX512BW-NEXT:    vpinsrb $12, %ecx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT:    movq %rax, %rcx
+; X64-AVX512BW-NEXT:    shrq $40, %rax
+; X64-AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm1, %xmm1
+; X64-AVX512BW-NEXT:    shrq $48, %rcx
+; X64-AVX512BW-NEXT:    vpinsrb $14, %ecx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT:    shrq $56, %rdx
+; X64-AVX512BW-NEXT:    vpinsrb $15, %edx, %xmm1, %xmm1
+; X64-AVX512BW-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; X64-AVX512BW-NEXT:    vpcmpeqb {{.*}}(%rip), %zmm2, %k1
+; X64-AVX512BW-NEXT:    vpcmpeqb {{.*}}(%rip), %zmm0, %k0 {%k1}
+; X64-AVX512BW-NEXT:    kortestq %k0, %k0
+; X64-AVX512BW-NEXT:    setb %al
+; X64-AVX512BW-NEXT:    vzeroupper
+; X64-AVX512BW-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 96) nounwind
+  %c = icmp eq i32 %m, 0
+  ret i1 %c
+}
+
+define i32 @length127(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length127:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $127
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: length127:
+; X64:       # %bb.0:
+; X64-NEXT:    movl $127, %edx
+; X64-NEXT:    jmp memcmp # TAILCALL
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 127) nounwind
+  ret i32 %m
+}
+
+define i1 @length127_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length127_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $127
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setne %al
+; X86-NEXT:    retl
+;
+; X64-SSE-LABEL: length127_eq:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    pushq %rax
+; X64-SSE-NEXT:    movl $127, %edx
+; X64-SSE-NEXT:    callq memcmp
+; X64-SSE-NEXT:    testl %eax, %eax
+; X64-SSE-NEXT:    setne %al
+; X64-SSE-NEXT:    popq %rcx
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX1-LABEL: length127_eq:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    pushq %rax
+; X64-AVX1-NEXT:    movl $127, %edx
+; X64-AVX1-NEXT:    callq memcmp
+; X64-AVX1-NEXT:    testl %eax, %eax
+; X64-AVX1-NEXT:    setne %al
+; X64-AVX1-NEXT:    popq %rcx
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: length127_eq:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    pushq %rax
+; X64-AVX2-NEXT:    movl $127, %edx
+; X64-AVX2-NEXT:    callq memcmp
+; X64-AVX2-NEXT:    testl %eax, %eax
+; X64-AVX2-NEXT:    setne %al
+; X64-AVX2-NEXT:    popq %rcx
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512F-LABEL: length127_eq:
+; X64-AVX512F:       # %bb.0:
+; X64-AVX512F-NEXT:    vmovdqu64 (%rdi), %zmm0
+; X64-AVX512F-NEXT:    vmovdqu64 63(%rdi), %zmm1
+; X64-AVX512F-NEXT:    vpcmpeqd (%rsi), %zmm0, %k1
+; X64-AVX512F-NEXT:    vpcmpeqd 63(%rsi), %zmm1, %k0 {%k1}
+; X64-AVX512F-NEXT:    kortestw %k0, %k0
+; X64-AVX512F-NEXT:    setae %al
+; X64-AVX512F-NEXT:    vzeroupper
+; X64-AVX512F-NEXT:    retq
+;
+; X64-AVX512BW-LABEL: length127_eq:
+; X64-AVX512BW:       # %bb.0:
+; X64-AVX512BW-NEXT:    vmovdqu64 (%rdi), %zmm0
+; X64-AVX512BW-NEXT:    vmovdqu64 63(%rdi), %zmm1
+; X64-AVX512BW-NEXT:    vpcmpeqb (%rsi), %zmm0, %k1
+; X64-AVX512BW-NEXT:    vpcmpeqb 63(%rsi), %zmm1, %k0 {%k1}
+; X64-AVX512BW-NEXT:    kortestq %k0, %k0
+; X64-AVX512BW-NEXT:    setae %al
+; X64-AVX512BW-NEXT:    vzeroupper
+; X64-AVX512BW-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 127) nounwind
+  %cmp = icmp ne i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length127_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length127_lt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $127
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    shrl $31, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: length127_lt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $127, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    shrl $31, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 127) nounwind
+  %cmp = icmp slt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length127_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length127_gt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $127
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setg %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length127_gt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $127, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setg %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 127) nounwind
+  %cmp = icmp sgt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length127_eq_const(i8* %X) nounwind {
+; X86-LABEL: length127_eq_const:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $127
+; X86-NEXT:    pushl $.L.str
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-SSE-LABEL: length127_eq_const:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    pushq %rax
+; X64-SSE-NEXT:    movl $.L.str, %esi
+; X64-SSE-NEXT:    movl $127, %edx
+; X64-SSE-NEXT:    callq memcmp
+; X64-SSE-NEXT:    testl %eax, %eax
+; X64-SSE-NEXT:    sete %al
+; X64-SSE-NEXT:    popq %rcx
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX1-LABEL: length127_eq_const:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    pushq %rax
+; X64-AVX1-NEXT:    movl $.L.str, %esi
+; X64-AVX1-NEXT:    movl $127, %edx
+; X64-AVX1-NEXT:    callq memcmp
+; X64-AVX1-NEXT:    testl %eax, %eax
+; X64-AVX1-NEXT:    sete %al
+; X64-AVX1-NEXT:    popq %rcx
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: length127_eq_const:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    pushq %rax
+; X64-AVX2-NEXT:    movl $.L.str, %esi
+; X64-AVX2-NEXT:    movl $127, %edx
+; X64-AVX2-NEXT:    callq memcmp
+; X64-AVX2-NEXT:    testl %eax, %eax
+; X64-AVX2-NEXT:    sete %al
+; X64-AVX2-NEXT:    popq %rcx
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512F-LABEL: length127_eq_const:
+; X64-AVX512F:       # %bb.0:
+; X64-AVX512F-NEXT:    vmovdqu64 (%rdi), %zmm0
+; X64-AVX512F-NEXT:    vmovdqu64 63(%rdi), %zmm1
+; X64-AVX512F-NEXT:    vpcmpeqd {{.*}}(%rip), %zmm0, %k1
+; X64-AVX512F-NEXT:    vpcmpeqd .L.str+{{.*}}(%rip), %zmm1, %k0 {%k1}
+; X64-AVX512F-NEXT:    kortestw %k0, %k0
+; X64-AVX512F-NEXT:    setb %al
+; X64-AVX512F-NEXT:    vzeroupper
+; X64-AVX512F-NEXT:    retq
+;
+; X64-AVX512BW-LABEL: length127_eq_const:
+; X64-AVX512BW:       # %bb.0:
+; X64-AVX512BW-NEXT:    vmovdqu64 (%rdi), %zmm0
+; X64-AVX512BW-NEXT:    vmovdqu64 63(%rdi), %zmm1
+; X64-AVX512BW-NEXT:    vpcmpeqb {{.*}}(%rip), %zmm0, %k1
+; X64-AVX512BW-NEXT:    vpcmpeqb .L.str+{{.*}}(%rip), %zmm1, %k0 {%k1}
+; X64-AVX512BW-NEXT:    kortestq %k0, %k0
+; X64-AVX512BW-NEXT:    setb %al
+; X64-AVX512BW-NEXT:    vzeroupper
+; X64-AVX512BW-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 127) nounwind
+  %c = icmp eq i32 %m, 0
+  ret i1 %c
+}
+
+define i32 @length128(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $128
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: length128:
+; X64:       # %bb.0:
+; X64-NEXT:    movl $128, %edx
+; X64-NEXT:    jmp memcmp # TAILCALL
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 128) nounwind
+  ret i32 %m
+}
+
+define i1 @length128_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length128_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $128
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setne %al
+; X86-NEXT:    retl
+;
+; X64-SSE-LABEL: length128_eq:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    pushq %rax
+; X64-SSE-NEXT:    movl $128, %edx
+; X64-SSE-NEXT:    callq memcmp
+; X64-SSE-NEXT:    testl %eax, %eax
+; X64-SSE-NEXT:    setne %al
+; X64-SSE-NEXT:    popq %rcx
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX1-LABEL: length128_eq:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    pushq %rax
+; X64-AVX1-NEXT:    movl $128, %edx
+; X64-AVX1-NEXT:    callq memcmp
+; X64-AVX1-NEXT:    testl %eax, %eax
+; X64-AVX1-NEXT:    setne %al
+; X64-AVX1-NEXT:    popq %rcx
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: length128_eq:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    pushq %rax
+; X64-AVX2-NEXT:    movl $128, %edx
+; X64-AVX2-NEXT:    callq memcmp
+; X64-AVX2-NEXT:    testl %eax, %eax
+; X64-AVX2-NEXT:    setne %al
+; X64-AVX2-NEXT:    popq %rcx
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512F-LABEL: length128_eq:
+; X64-AVX512F:       # %bb.0:
+; X64-AVX512F-NEXT:    vmovdqu64 (%rdi), %zmm0
+; X64-AVX512F-NEXT:    vmovdqu64 64(%rdi), %zmm1
+; X64-AVX512F-NEXT:    vpcmpeqd (%rsi), %zmm0, %k1
+; X64-AVX512F-NEXT:    vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
+; X64-AVX512F-NEXT:    kortestw %k0, %k0
+; X64-AVX512F-NEXT:    setae %al
+; X64-AVX512F-NEXT:    vzeroupper
+; X64-AVX512F-NEXT:    retq
+;
+; X64-AVX512BW-LABEL: length128_eq:
+; X64-AVX512BW:       # %bb.0:
+; X64-AVX512BW-NEXT:    vmovdqu64 (%rdi), %zmm0
+; X64-AVX512BW-NEXT:    vmovdqu64 64(%rdi), %zmm1
+; X64-AVX512BW-NEXT:    vpcmpeqb (%rsi), %zmm0, %k1
+; X64-AVX512BW-NEXT:    vpcmpeqb 64(%rsi), %zmm1, %k0 {%k1}
+; X64-AVX512BW-NEXT:    kortestq %k0, %k0
+; X64-AVX512BW-NEXT:    setae %al
+; X64-AVX512BW-NEXT:    vzeroupper
+; X64-AVX512BW-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 128) nounwind
+  %cmp = icmp ne i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length128_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length128_lt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $128
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    shrl $31, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: length128_lt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $128, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    shrl $31, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 128) nounwind
+  %cmp = icmp slt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length128_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length128_gt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $128
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setg %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length128_gt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $128, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setg %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 128) nounwind
+  %cmp = icmp sgt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length128_eq_const(i8* %X) nounwind {
+; X86-LABEL: length128_eq_const:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $128
+; X86-NEXT:    pushl $.L.str
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-SSE-LABEL: length128_eq_const:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    pushq %rax
+; X64-SSE-NEXT:    movl $.L.str, %esi
+; X64-SSE-NEXT:    movl $128, %edx
+; X64-SSE-NEXT:    callq memcmp
+; X64-SSE-NEXT:    testl %eax, %eax
+; X64-SSE-NEXT:    sete %al
+; X64-SSE-NEXT:    popq %rcx
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX1-LABEL: length128_eq_const:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    pushq %rax
+; X64-AVX1-NEXT:    movl $.L.str, %esi
+; X64-AVX1-NEXT:    movl $128, %edx
+; X64-AVX1-NEXT:    callq memcmp
+; X64-AVX1-NEXT:    testl %eax, %eax
+; X64-AVX1-NEXT:    sete %al
+; X64-AVX1-NEXT:    popq %rcx
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: length128_eq_const:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    pushq %rax
+; X64-AVX2-NEXT:    movl $.L.str, %esi
+; X64-AVX2-NEXT:    movl $128, %edx
+; X64-AVX2-NEXT:    callq memcmp
+; X64-AVX2-NEXT:    testl %eax, %eax
+; X64-AVX2-NEXT:    sete %al
+; X64-AVX2-NEXT:    popq %rcx
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512F-LABEL: length128_eq_const:
+; X64-AVX512F:       # %bb.0:
+; X64-AVX512F-NEXT:    vmovdqu64 (%rdi), %zmm0
+; X64-AVX512F-NEXT:    vmovdqu64 64(%rdi), %zmm1
+; X64-AVX512F-NEXT:    vpcmpeqd {{.*}}(%rip), %zmm0, %k1
+; X64-AVX512F-NEXT:    vpcmpeqd .L.str+{{.*}}(%rip), %zmm1, %k0 {%k1}
+; X64-AVX512F-NEXT:    kortestw %k0, %k0
+; X64-AVX512F-NEXT:    setb %al
+; X64-AVX512F-NEXT:    vzeroupper
+; X64-AVX512F-NEXT:    retq
+;
+; X64-AVX512BW-LABEL: length128_eq_const:
+; X64-AVX512BW:       # %bb.0:
+; X64-AVX512BW-NEXT:    vmovdqu64 (%rdi), %zmm0
+; X64-AVX512BW-NEXT:    vmovdqu64 64(%rdi), %zmm1
+; X64-AVX512BW-NEXT:    vpcmpeqb {{.*}}(%rip), %zmm0, %k1
+; X64-AVX512BW-NEXT:    vpcmpeqb .L.str+{{.*}}(%rip), %zmm1, %k0 {%k1}
+; X64-AVX512BW-NEXT:    kortestq %k0, %k0
+; X64-AVX512BW-NEXT:    setb %al
+; X64-AVX512BW-NEXT:    vzeroupper
+; X64-AVX512BW-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 128) nounwind
+  %c = icmp eq i32 %m, 0
+  ret i1 %c
+}
+
+define i32 @length192(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length192:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $192
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: length192:
+; X64:       # %bb.0:
+; X64-NEXT:    movl $192, %edx
+; X64-NEXT:    jmp memcmp # TAILCALL
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 192) nounwind
+  ret i32 %m
+}
+
+define i1 @length192_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length192_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $192
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setne %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length192_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $192, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setne %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 192) nounwind
+  %cmp = icmp ne i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length192_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length192_lt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $192
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    shrl $31, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: length192_lt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $192, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    shrl $31, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 192) nounwind
+  %cmp = icmp slt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length192_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length192_gt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $192
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setg %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length192_gt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $192, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setg %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 192) nounwind
+  %cmp = icmp sgt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length192_eq_const(i8* %X) nounwind {
+; X86-LABEL: length192_eq_const:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $192
+; X86-NEXT:    pushl $.L.str
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length192_eq_const:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $.L.str, %esi
+; X64-NEXT:    movl $192, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    sete %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 192) nounwind
+  %c = icmp eq i32 %m, 0
+  ret i1 %c
+}
+
+define i32 @length255(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length255:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $255
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: length255:
+; X64:       # %bb.0:
+; X64-NEXT:    movl $255, %edx
+; X64-NEXT:    jmp memcmp # TAILCALL
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 255) nounwind
+  ret i32 %m
+}
+
+define i1 @length255_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length255_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $255
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setne %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length255_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $255, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setne %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 255) nounwind
+  %cmp = icmp ne i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length255_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length255_lt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $255
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    shrl $31, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: length255_lt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $255, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    shrl $31, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 255) nounwind
+  %cmp = icmp slt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length255_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length255_gt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $255
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setg %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length255_gt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $255, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setg %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 255) nounwind
+  %cmp = icmp sgt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length255_eq_const(i8* %X) nounwind {
+; X86-LABEL: length255_eq_const:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $255
+; X86-NEXT:    pushl $.L.str
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length255_eq_const:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $.L.str, %esi
+; X64-NEXT:    movl $255, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    sete %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 255) nounwind
+  %c = icmp eq i32 %m, 0
+  ret i1 %c
+}
+
+define i32 @length256(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length256:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $256 # imm = 0x100
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: length256:
+; X64:       # %bb.0:
+; X64-NEXT:    movl $256, %edx # imm = 0x100
+; X64-NEXT:    jmp memcmp # TAILCALL
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 256) nounwind
+  ret i32 %m
+}
+
+define i1 @length256_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length256_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $256 # imm = 0x100
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setne %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length256_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $256, %edx # imm = 0x100
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setne %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 256) nounwind
+  %cmp = icmp ne i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length256_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length256_lt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $256 # imm = 0x100
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    shrl $31, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: length256_lt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $256, %edx # imm = 0x100
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    shrl $31, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 256) nounwind
+  %cmp = icmp slt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length256_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length256_gt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $256 # imm = 0x100
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setg %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length256_gt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $256, %edx # imm = 0x100
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setg %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 256) nounwind
+  %cmp = icmp sgt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length256_eq_const(i8* %X) nounwind {
+; X86-LABEL: length256_eq_const:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $256 # imm = 0x100
+; X86-NEXT:    pushl $.L.str
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length256_eq_const:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $.L.str, %esi
+; X64-NEXT:    movl $256, %edx # imm = 0x100
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    sete %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 256) nounwind
+  %c = icmp eq i32 %m, 0
+  ret i1 %c
+}
+
+define i32 @length384(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length384:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $384 # imm = 0x180
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: length384:
+; X64:       # %bb.0:
+; X64-NEXT:    movl $384, %edx # imm = 0x180
+; X64-NEXT:    jmp memcmp # TAILCALL
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 384) nounwind
+  ret i32 %m
+}
+
+define i1 @length384_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length384_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $384 # imm = 0x180
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setne %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length384_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $384, %edx # imm = 0x180
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setne %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 384) nounwind
+  %cmp = icmp ne i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length384_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length384_lt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $384 # imm = 0x180
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    shrl $31, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: length384_lt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $384, %edx # imm = 0x180
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    shrl $31, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 384) nounwind
+  %cmp = icmp slt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length384_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length384_gt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $384 # imm = 0x180
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setg %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length384_gt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $384, %edx # imm = 0x180
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setg %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 384) nounwind
+  %cmp = icmp sgt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length384_eq_const(i8* %X) nounwind {
+; X86-LABEL: length384_eq_const:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $384 # imm = 0x180
+; X86-NEXT:    pushl $.L.str
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length384_eq_const:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $.L.str, %esi
+; X64-NEXT:    movl $384, %edx # imm = 0x180
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    sete %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 384) nounwind
+  %c = icmp eq i32 %m, 0
   ret i1 %c
 }
 
-define i32 @length64(i8* %X, i8* %Y) nounwind {
-; X86-LABEL: length64:
+define i32 @length511(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length511:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl $0
-; X86-NEXT:    pushl $64
+; X86-NEXT:    pushl $511 # imm = 0x1FF
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    calll memcmp
 ; X86-NEXT:    addl $16, %esp
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: length64:
+; X64-LABEL: length511:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl $64, %edx
+; X64-NEXT:    movl $511, %edx # imm = 0x1FF
 ; X64-NEXT:    jmp memcmp # TAILCALL
-  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 64) nounwind
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 511) nounwind
   ret i32 %m
 }
 
-define i1 @length64_eq(i8* %x, i8* %y) nounwind {
-; X86-LABEL: length64_eq:
+define i1 @length511_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length511_eq:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl $0
-; X86-NEXT:    pushl $64
+; X86-NEXT:    pushl $511 # imm = 0x1FF
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    calll memcmp
@@ -1569,65 +4619,79 @@ define i1 @length64_eq(i8* %x, i8* %y) nounwind {
 ; X86-NEXT:    setne %al
 ; X86-NEXT:    retl
 ;
-; X64-SSE2-LABEL: length64_eq:
-; X64-SSE2:       # %bb.0:
-; X64-SSE2-NEXT:    pushq %rax
-; X64-SSE2-NEXT:    movl $64, %edx
-; X64-SSE2-NEXT:    callq memcmp
-; X64-SSE2-NEXT:    testl %eax, %eax
-; X64-SSE2-NEXT:    setne %al
-; X64-SSE2-NEXT:    popq %rcx
-; X64-SSE2-NEXT:    retq
-;
-; X64-AVX1-LABEL: length64_eq:
-; X64-AVX1:       # %bb.0:
-; X64-AVX1-NEXT:    pushq %rax
-; X64-AVX1-NEXT:    movl $64, %edx
-; X64-AVX1-NEXT:    callq memcmp
-; X64-AVX1-NEXT:    testl %eax, %eax
-; X64-AVX1-NEXT:    setne %al
-; X64-AVX1-NEXT:    popq %rcx
-; X64-AVX1-NEXT:    retq
-;
-; X64-AVX2-LABEL: length64_eq:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT:    vmovdqu 32(%rdi), %ymm1
-; X64-AVX2-NEXT:    vpxor 32(%rsi), %ymm1, %ymm1
-; X64-AVX2-NEXT:    vpxor (%rsi), %ymm0, %ymm0
-; X64-AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT:    vptest %ymm0, %ymm0
-; X64-AVX2-NEXT:    setne %al
-; X64-AVX2-NEXT:    vzeroupper
-; X64-AVX2-NEXT:    retq
+; X64-LABEL: length511_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $511, %edx # imm = 0x1FF
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setne %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 511) nounwind
+  %cmp = icmp ne i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length511_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length511_lt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $511 # imm = 0x1FF
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    shrl $31, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
 ;
-; X64-AVX512F-LABEL: length64_eq:
-; X64-AVX512F:       # %bb.0:
-; X64-AVX512F-NEXT:    vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT:    vpcmpeqd (%rsi), %zmm0, %k0
-; X64-AVX512F-NEXT:    kortestw %k0, %k0
-; X64-AVX512F-NEXT:    setae %al
-; X64-AVX512F-NEXT:    vzeroupper
-; X64-AVX512F-NEXT:    retq
+; X64-LABEL: length511_lt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $511, %edx # imm = 0x1FF
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    shrl $31, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 511) nounwind
+  %cmp = icmp slt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length511_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length511_gt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $511 # imm = 0x1FF
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setg %al
+; X86-NEXT:    retl
 ;
-; X64-AVX512BW-LABEL: length64_eq:
-; X64-AVX512BW:       # %bb.0:
-; X64-AVX512BW-NEXT:    vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT:    vpcmpeqb (%rsi), %zmm0, %k0
-; X64-AVX512BW-NEXT:    kortestq %k0, %k0
-; X64-AVX512BW-NEXT:    setae %al
-; X64-AVX512BW-NEXT:    vzeroupper
-; X64-AVX512BW-NEXT:    retq
-  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
-  %cmp = icmp ne i32 %call, 0
+; X64-LABEL: length511_gt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $511, %edx # imm = 0x1FF
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setg %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 511) nounwind
+  %cmp = icmp sgt i32 %call, 0
   ret i1 %cmp
 }
 
-define i1 @length64_eq_const(i8* %X) nounwind {
-; X86-LABEL: length64_eq_const:
+define i1 @length511_eq_const(i8* %X) nounwind {
+; X86-LABEL: length511_eq_const:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl $0
-; X86-NEXT:    pushl $64
+; X86-NEXT:    pushl $511 # imm = 0x1FF
 ; X86-NEXT:    pushl $.L.str
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    calll memcmp
@@ -1636,58 +4700,145 @@ define i1 @length64_eq_const(i8* %X) nounwind {
 ; X86-NEXT:    sete %al
 ; X86-NEXT:    retl
 ;
-; X64-SSE2-LABEL: length64_eq_const:
-; X64-SSE2:       # %bb.0:
-; X64-SSE2-NEXT:    pushq %rax
-; X64-SSE2-NEXT:    movl $.L.str, %esi
-; X64-SSE2-NEXT:    movl $64, %edx
-; X64-SSE2-NEXT:    callq memcmp
-; X64-SSE2-NEXT:    testl %eax, %eax
-; X64-SSE2-NEXT:    sete %al
-; X64-SSE2-NEXT:    popq %rcx
-; X64-SSE2-NEXT:    retq
+; X64-LABEL: length511_eq_const:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $.L.str, %esi
+; X64-NEXT:    movl $511, %edx # imm = 0x1FF
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    sete %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 511) nounwind
+  %c = icmp eq i32 %m, 0
+  ret i1 %c
+}
+
+define i32 @length512(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length512:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $512 # imm = 0x200
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    retl
 ;
-; X64-AVX1-LABEL: length64_eq_const:
-; X64-AVX1:       # %bb.0:
-; X64-AVX1-NEXT:    pushq %rax
-; X64-AVX1-NEXT:    movl $.L.str, %esi
-; X64-AVX1-NEXT:    movl $64, %edx
-; X64-AVX1-NEXT:    callq memcmp
-; X64-AVX1-NEXT:    testl %eax, %eax
-; X64-AVX1-NEXT:    sete %al
-; X64-AVX1-NEXT:    popq %rcx
-; X64-AVX1-NEXT:    retq
+; X64-LABEL: length512:
+; X64:       # %bb.0:
+; X64-NEXT:    movl $512, %edx # imm = 0x200
+; X64-NEXT:    jmp memcmp # TAILCALL
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 512) nounwind
+  ret i32 %m
+}
+
+define i1 @length512_eq(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length512_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $512 # imm = 0x200
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setne %al
+; X86-NEXT:    retl
 ;
-; X64-AVX2-LABEL: length64_eq_const:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    vmovdqu (%rdi), %ymm0
-; X64-AVX2-NEXT:    vmovdqu 32(%rdi), %ymm1
-; X64-AVX2-NEXT:    vpxor {{.*}}(%rip), %ymm1, %ymm1
-; X64-AVX2-NEXT:    vpxor {{.*}}(%rip), %ymm0, %ymm0
-; X64-AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT:    vptest %ymm0, %ymm0
-; X64-AVX2-NEXT:    sete %al
-; X64-AVX2-NEXT:    vzeroupper
-; X64-AVX2-NEXT:    retq
+; X64-LABEL: length512_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $512, %edx # imm = 0x200
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setne %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 512) nounwind
+  %cmp = icmp ne i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length512_lt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length512_lt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $512 # imm = 0x200
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    shrl $31, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
 ;
-; X64-AVX512F-LABEL: length64_eq_const:
-; X64-AVX512F:       # %bb.0:
-; X64-AVX512F-NEXT:    vmovdqu64 (%rdi), %zmm0
-; X64-AVX512F-NEXT:    vpcmpeqd {{.*}}(%rip), %zmm0, %k0
-; X64-AVX512F-NEXT:    kortestw %k0, %k0
-; X64-AVX512F-NEXT:    setb %al
-; X64-AVX512F-NEXT:    vzeroupper
-; X64-AVX512F-NEXT:    retq
+; X64-LABEL: length512_lt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $512, %edx # imm = 0x200
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    shrl $31, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 512) nounwind
+  %cmp = icmp slt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length512_gt(i8* %x, i8* %y) nounwind {
+; X86-LABEL: length512_gt:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $512 # imm = 0x200
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    setg %al
+; X86-NEXT:    retl
 ;
-; X64-AVX512BW-LABEL: length64_eq_const:
-; X64-AVX512BW:       # %bb.0:
-; X64-AVX512BW-NEXT:    vmovdqu64 (%rdi), %zmm0
-; X64-AVX512BW-NEXT:    vpcmpeqb {{.*}}(%rip), %zmm0, %k0
-; X64-AVX512BW-NEXT:    kortestq %k0, %k0
-; X64-AVX512BW-NEXT:    setb %al
-; X64-AVX512BW-NEXT:    vzeroupper
-; X64-AVX512BW-NEXT:    retq
-  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
+; X64-LABEL: length512_gt:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $512, %edx # imm = 0x200
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setg %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 512) nounwind
+  %cmp = icmp sgt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @length512_eq_const(i8* %X) nounwind {
+; X86-LABEL: length512_eq_const:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $512 # imm = 0x200
+; X86-NEXT:    pushl $.L.str
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length512_eq_const:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $.L.str, %esi
+; X64-NEXT:    movl $512, %edx # imm = 0x200
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    sete %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 0), i64 512) nounwind
   %c = icmp eq i32 %m, 0
   ret i1 %c
 }


        


More information about the llvm-commits mailing list