[llvm] r309711 - [CGP] use narrower types in memcmp expansion when possible

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 1 10:24:54 PDT 2017


Author: spatel
Date: Tue Aug  1 10:24:54 2017
New Revision: 309711

URL: http://llvm.org/viewvc/llvm-project?rev=309711&view=rev
Log:
[CGP] use narrower types in memcmp expansion when possible

This only affects very small memcmp on x86 for now, but it
will become more important if we allow vector-sized load and
compares.

Modified:
    llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
    llvm/trunk/test/CodeGen/X86/memcmp-optsize.ll
    llvm/trunk/test/CodeGen/X86/memcmp.ll
    llvm/trunk/test/Transforms/CodeGenPrepare/X86/memcmp.ll

Modified: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp?rev=309711&r1=309710&r2=309711&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp Tue Aug  1 10:24:54 2017
@@ -2271,8 +2271,12 @@ static bool expandMemCmp(CallInst *CI, c
     return false;
   }
 
-  // Early exit from expansion if size greater than max bytes to load.
+  // Scale the max size down if the target can load more bytes than we need.
   uint64_t SizeVal = SizeCast->getZExtValue();
+  if (MaxLoadSize > SizeVal)
+    MaxLoadSize = 1 << SizeCast->getValue().logBase2();
+
+  // Calculate how many load pairs are needed for the constant size.
   unsigned NumLoads = 0;
   unsigned RemainingSize = SizeVal;
   unsigned LoadSize = MaxLoadSize;
@@ -2282,6 +2286,7 @@ static bool expandMemCmp(CallInst *CI, c
     LoadSize = LoadSize / 2;
   }
 
+  // Don't expand if this will require more loads than desired by the target.
   if (NumLoads > TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize())) {
     NumMemCmpGreaterThanMax++;
     return false;

Modified: llvm/trunk/test/CodeGen/X86/memcmp-optsize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memcmp-optsize.ll?rev=309711&r1=309710&r2=309711&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memcmp-optsize.ll (original)
+++ llvm/trunk/test/CodeGen/X86/memcmp-optsize.ll Tue Aug  1 10:24:54 2017
@@ -117,9 +117,7 @@ define i32 @length3(i8* %X, i8* %Y) noun
 ; X86-NEXT:    movzwl (%ecx), %esi
 ; X86-NEXT:    rolw $8, %dx
 ; X86-NEXT:    rolw $8, %si
-; X86-NEXT:    movzwl %dx, %edx
-; X86-NEXT:    movzwl %si, %esi
-; X86-NEXT:    cmpl %esi, %edx
+; X86-NEXT:    cmpw %si, %dx
 ; X86-NEXT:    jne .LBB4_1
 ; X86-NEXT:  # BB#2: # %loadbb1
 ; X86-NEXT:    movzbl 2(%eax), %eax
@@ -131,7 +129,7 @@ define i32 @length3(i8* %X, i8* %Y) noun
 ; X86-NEXT:    incl %ecx
 ; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    decl %eax
-; X86-NEXT:    cmpl %esi, %edx
+; X86-NEXT:    cmpw %si, %dx
 ; X86-NEXT:    cmovael %ecx, %eax
 ; X86-NEXT:  .LBB4_3: # %endblock
 ; X86-NEXT:    popl %esi
@@ -143,9 +141,7 @@ define i32 @length3(i8* %X, i8* %Y) noun
 ; X64-NEXT:    movzwl (%rsi), %ecx
 ; X64-NEXT:    rolw $8, %ax
 ; X64-NEXT:    rolw $8, %cx
-; X64-NEXT:    movzwl %ax, %eax
-; X64-NEXT:    movzwl %cx, %ecx
-; X64-NEXT:    cmpq %rcx, %rax
+; X64-NEXT:    cmpw %cx, %ax
 ; X64-NEXT:    jne .LBB4_1
 ; X64-NEXT:  # BB#2: # %loadbb1
 ; X64-NEXT:    movzbl 2(%rdi), %eax
@@ -306,7 +302,7 @@ define i32 @length5(i8* %X, i8* %Y) noun
 ; X64-NEXT:    movl (%rsi), %ecx
 ; X64-NEXT:    bswapl %eax
 ; X64-NEXT:    bswapl %ecx
-; X64-NEXT:    cmpq %rcx, %rax
+; X64-NEXT:    cmpl %ecx, %eax
 ; X64-NEXT:    jne .LBB9_1
 ; X64-NEXT:  # BB#2: # %loadbb1
 ; X64-NEXT:    movzbl 4(%rdi), %eax

Modified: llvm/trunk/test/CodeGen/X86/memcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memcmp.ll?rev=309711&r1=309710&r2=309711&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memcmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/memcmp.ll Tue Aug  1 10:24:54 2017
@@ -117,9 +117,7 @@ define i32 @length3(i8* %X, i8* %Y) noun
 ; X86-NEXT:    movzwl (%ecx), %esi
 ; X86-NEXT:    rolw $8, %dx
 ; X86-NEXT:    rolw $8, %si
-; X86-NEXT:    movzwl %dx, %edx
-; X86-NEXT:    movzwl %si, %esi
-; X86-NEXT:    cmpl %esi, %edx
+; X86-NEXT:    cmpw %si, %dx
 ; X86-NEXT:    jne .LBB4_1
 ; X86-NEXT:  # BB#2: # %loadbb1
 ; X86-NEXT:    movzbl 2(%eax), %eax
@@ -140,9 +138,7 @@ define i32 @length3(i8* %X, i8* %Y) noun
 ; X64-NEXT:    movzwl (%rsi), %ecx
 ; X64-NEXT:    rolw $8, %ax
 ; X64-NEXT:    rolw $8, %cx
-; X64-NEXT:    movzwl %ax, %eax
-; X64-NEXT:    movzwl %cx, %ecx
-; X64-NEXT:    cmpq %rcx, %rax
+; X64-NEXT:    cmpw %cx, %ax
 ; X64-NEXT:    jne .LBB4_1
 ; X64-NEXT:  # BB#2: # %loadbb1
 ; X64-NEXT:    movzbl 2(%rdi), %eax
@@ -299,7 +295,7 @@ define i32 @length5(i8* %X, i8* %Y) noun
 ; X64-NEXT:    movl (%rsi), %ecx
 ; X64-NEXT:    bswapl %eax
 ; X64-NEXT:    bswapl %ecx
-; X64-NEXT:    cmpq %rcx, %rax
+; X64-NEXT:    cmpl %ecx, %eax
 ; X64-NEXT:    jne .LBB9_1
 ; X64-NEXT:  # BB#2: # %loadbb1
 ; X64-NEXT:    movzbl 4(%rdi), %eax

Modified: llvm/trunk/test/Transforms/CodeGenPrepare/X86/memcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/memcmp.ll?rev=309711&r1=309710&r2=309711&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/memcmp.ll (original)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/memcmp.ll Tue Aug  1 10:24:54 2017
@@ -22,63 +22,32 @@ define i32 @cmp2(i8* nocapture readonly
 }
 
 define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
-; X32-LABEL: @cmp3(
-; X32-NEXT:  loadbb:
-; X32-NEXT:    [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i16*
-; X32-NEXT:    [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i16*
-; X32-NEXT:    [[TMP2:%.*]] = load i16, i16* [[TMP0]]
-; X32-NEXT:    [[TMP3:%.*]] = load i16, i16* [[TMP1]]
-; X32-NEXT:    [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
-; X32-NEXT:    [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
-; X32-NEXT:    [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
-; X32-NEXT:    [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
-; X32-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP6]], [[TMP7]]
-; X32-NEXT:    br i1 [[TMP8]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
-; X32:       res_block:
-; X32-NEXT:    [[TMP9:%.*]] = icmp ult i32 [[TMP6]], [[TMP7]]
-; X32-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
-; X32-NEXT:    br label [[ENDBLOCK:%.*]]
-; X32:       loadbb1:
-; X32-NEXT:    [[TMP11:%.*]] = getelementptr i8, i8* [[X]], i8 2
-; X32-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 2
-; X32-NEXT:    [[TMP13:%.*]] = load i8, i8* [[TMP11]]
-; X32-NEXT:    [[TMP14:%.*]] = load i8, i8* [[TMP12]]
-; X32-NEXT:    [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
-; X32-NEXT:    [[TMP16:%.*]] = zext i8 [[TMP14]] to i32
-; X32-NEXT:    [[TMP17:%.*]] = sub i32 [[TMP15]], [[TMP16]]
-; X32-NEXT:    br label [[ENDBLOCK]]
-; X32:       endblock:
-; X32-NEXT:    [[PHI_RES:%.*]] = phi i32 [ [[TMP17]], [[LOADBB1]] ], [ [[TMP10]], [[RES_BLOCK]] ]
-; X32-NEXT:    ret i32 [[PHI_RES]]
-;
-; X64-LABEL: @cmp3(
-; X64-NEXT:  loadbb:
-; X64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i16*
-; X64-NEXT:    [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i16*
-; X64-NEXT:    [[TMP2:%.*]] = load i16, i16* [[TMP0]]
-; X64-NEXT:    [[TMP3:%.*]] = load i16, i16* [[TMP1]]
-; X64-NEXT:    [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
-; X64-NEXT:    [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
-; X64-NEXT:    [[TMP6:%.*]] = zext i16 [[TMP4]] to i64
-; X64-NEXT:    [[TMP7:%.*]] = zext i16 [[TMP5]] to i64
-; X64-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[TMP6]], [[TMP7]]
-; X64-NEXT:    br i1 [[TMP8]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
-; X64:       res_block:
-; X64-NEXT:    [[TMP9:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
-; X64-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
-; X64-NEXT:    br label [[ENDBLOCK:%.*]]
-; X64:       loadbb1:
-; X64-NEXT:    [[TMP11:%.*]] = getelementptr i8, i8* [[X]], i8 2
-; X64-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 2
-; X64-NEXT:    [[TMP13:%.*]] = load i8, i8* [[TMP11]]
-; X64-NEXT:    [[TMP14:%.*]] = load i8, i8* [[TMP12]]
-; X64-NEXT:    [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
-; X64-NEXT:    [[TMP16:%.*]] = zext i8 [[TMP14]] to i32
-; X64-NEXT:    [[TMP17:%.*]] = sub i32 [[TMP15]], [[TMP16]]
-; X64-NEXT:    br label [[ENDBLOCK]]
-; X64:       endblock:
-; X64-NEXT:    [[PHI_RES:%.*]] = phi i32 [ [[TMP17]], [[LOADBB1]] ], [ [[TMP10]], [[RES_BLOCK]] ]
-; X64-NEXT:    ret i32 [[PHI_RES]]
+; ALL-LABEL: @cmp3(
+; ALL-NEXT:  loadbb:
+; ALL-NEXT:    [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i16*
+; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i16*
+; ALL-NEXT:    [[TMP2:%.*]] = load i16, i16* [[TMP0]]
+; ALL-NEXT:    [[TMP3:%.*]] = load i16, i16* [[TMP1]]
+; ALL-NEXT:    [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; ALL-NEXT:    [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; ALL-NEXT:    [[TMP6:%.*]] = icmp eq i16 [[TMP4]], [[TMP5]]
+; ALL-NEXT:    br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; ALL:       res_block:
+; ALL-NEXT:    [[TMP7:%.*]] = icmp ult i16 [[TMP4]], [[TMP5]]
+; ALL-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; ALL-NEXT:    br label [[ENDBLOCK:%.*]]
+; ALL:       loadbb1:
+; ALL-NEXT:    [[TMP9:%.*]] = getelementptr i8, i8* [[X]], i8 2
+; ALL-NEXT:    [[TMP10:%.*]] = getelementptr i8, i8* [[Y]], i8 2
+; ALL-NEXT:    [[TMP11:%.*]] = load i8, i8* [[TMP9]]
+; ALL-NEXT:    [[TMP12:%.*]] = load i8, i8* [[TMP10]]
+; ALL-NEXT:    [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; ALL-NEXT:    [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
+; ALL-NEXT:    [[TMP15:%.*]] = sub i32 [[TMP13]], [[TMP14]]
+; ALL-NEXT:    br label [[ENDBLOCK]]
+; ALL:       endblock:
+; ALL-NEXT:    [[PHI_RES:%.*]] = phi i32 [ [[TMP15]], [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; ALL-NEXT:    ret i32 [[PHI_RES]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 3)
   ret i32 %call
@@ -104,134 +73,70 @@ define i32 @cmp4(i8* nocapture readonly
 }
 
 define i32 @cmp5(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
-; X32-LABEL: @cmp5(
-; X32-NEXT:  loadbb:
-; X32-NEXT:    [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
-; X32-NEXT:    [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]]
-; X32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X32-NEXT:    [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
-; X32-NEXT:    [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
-; X32-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
-; X32-NEXT:    br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
-; X32:       res_block:
-; X32-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]]
-; X32-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
-; X32-NEXT:    br label [[ENDBLOCK:%.*]]
-; X32:       loadbb1:
-; X32-NEXT:    [[TMP9:%.*]] = getelementptr i8, i8* [[X]], i8 4
-; X32-NEXT:    [[TMP10:%.*]] = getelementptr i8, i8* [[Y]], i8 4
-; X32-NEXT:    [[TMP11:%.*]] = load i8, i8* [[TMP9]]
-; X32-NEXT:    [[TMP12:%.*]] = load i8, i8* [[TMP10]]
-; X32-NEXT:    [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
-; X32-NEXT:    [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
-; X32-NEXT:    [[TMP15:%.*]] = sub i32 [[TMP13]], [[TMP14]]
-; X32-NEXT:    br label [[ENDBLOCK]]
-; X32:       endblock:
-; X32-NEXT:    [[PHI_RES:%.*]] = phi i32 [ [[TMP15]], [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
-; X32-NEXT:    ret i32 [[PHI_RES]]
-;
-; X64-LABEL: @cmp5(
-; X64-NEXT:  loadbb:
-; X64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
-; X64-NEXT:    [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X64-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]]
-; X64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X64-NEXT:    [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
-; X64-NEXT:    [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
-; X64-NEXT:    [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
-; X64-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
-; X64-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[TMP6]], [[TMP7]]
-; X64-NEXT:    br i1 [[TMP8]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
-; X64:       res_block:
-; X64-NEXT:    [[TMP9:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
-; X64-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
-; X64-NEXT:    br label [[ENDBLOCK:%.*]]
-; X64:       loadbb1:
-; X64-NEXT:    [[TMP11:%.*]] = getelementptr i8, i8* [[X]], i8 4
-; X64-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 4
-; X64-NEXT:    [[TMP13:%.*]] = load i8, i8* [[TMP11]]
-; X64-NEXT:    [[TMP14:%.*]] = load i8, i8* [[TMP12]]
-; X64-NEXT:    [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
-; X64-NEXT:    [[TMP16:%.*]] = zext i8 [[TMP14]] to i32
-; X64-NEXT:    [[TMP17:%.*]] = sub i32 [[TMP15]], [[TMP16]]
-; X64-NEXT:    br label [[ENDBLOCK]]
-; X64:       endblock:
-; X64-NEXT:    [[PHI_RES:%.*]] = phi i32 [ [[TMP17]], [[LOADBB1]] ], [ [[TMP10]], [[RES_BLOCK]] ]
-; X64-NEXT:    ret i32 [[PHI_RES]]
+; ALL-LABEL: @cmp5(
+; ALL-NEXT:  loadbb:
+; ALL-NEXT:    [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; ALL-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; ALL-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; ALL-NEXT:    [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; ALL-NEXT:    [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; ALL-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
+; ALL-NEXT:    br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; ALL:       res_block:
+; ALL-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]]
+; ALL-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; ALL-NEXT:    br label [[ENDBLOCK:%.*]]
+; ALL:       loadbb1:
+; ALL-NEXT:    [[TMP9:%.*]] = getelementptr i8, i8* [[X]], i8 4
+; ALL-NEXT:    [[TMP10:%.*]] = getelementptr i8, i8* [[Y]], i8 4
+; ALL-NEXT:    [[TMP11:%.*]] = load i8, i8* [[TMP9]]
+; ALL-NEXT:    [[TMP12:%.*]] = load i8, i8* [[TMP10]]
+; ALL-NEXT:    [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; ALL-NEXT:    [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
+; ALL-NEXT:    [[TMP15:%.*]] = sub i32 [[TMP13]], [[TMP14]]
+; ALL-NEXT:    br label [[ENDBLOCK]]
+; ALL:       endblock:
+; ALL-NEXT:    [[PHI_RES:%.*]] = phi i32 [ [[TMP15]], [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; ALL-NEXT:    ret i32 [[PHI_RES]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 5)
   ret i32 %call
 }
 
 define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
-; X32-LABEL: @cmp6(
-; X32-NEXT:  loadbb:
-; X32-NEXT:    [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
-; X32-NEXT:    [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]]
-; X32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X32-NEXT:    [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
-; X32-NEXT:    [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
-; X32-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
-; X32-NEXT:    br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
-; X32:       res_block:
-; X32-NEXT:    [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
-; X32-NEXT:    [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ]
-; X32-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
-; X32-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
-; X32-NEXT:    br label [[ENDBLOCK:%.*]]
-; X32:       loadbb1:
-; X32-NEXT:    [[TMP9:%.*]] = bitcast i8* [[X]] to i16*
-; X32-NEXT:    [[TMP10:%.*]] = bitcast i8* [[Y]] to i16*
-; X32-NEXT:    [[TMP11:%.*]] = getelementptr i16, i16* [[TMP9]], i16 2
-; X32-NEXT:    [[TMP12:%.*]] = getelementptr i16, i16* [[TMP10]], i16 2
-; X32-NEXT:    [[TMP13:%.*]] = load i16, i16* [[TMP11]]
-; X32-NEXT:    [[TMP14:%.*]] = load i16, i16* [[TMP12]]
-; X32-NEXT:    [[TMP15:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP13]])
-; X32-NEXT:    [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
-; X32-NEXT:    [[TMP17]] = zext i16 [[TMP15]] to i32
-; X32-NEXT:    [[TMP18]] = zext i16 [[TMP16]] to i32
-; X32-NEXT:    [[TMP19:%.*]] = icmp eq i32 [[TMP17]], [[TMP18]]
-; X32-NEXT:    br i1 [[TMP19]], label [[ENDBLOCK]], label [[RES_BLOCK]]
-; X32:       endblock:
-; X32-NEXT:    [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
-; X32-NEXT:    ret i32 [[PHI_RES]]
-;
-; X64-LABEL: @cmp6(
-; X64-NEXT:  loadbb:
-; X64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
-; X64-NEXT:    [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X64-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]]
-; X64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X64-NEXT:    [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
-; X64-NEXT:    [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
-; X64-NEXT:    [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
-; X64-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
-; X64-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[TMP6]], [[TMP7]]
-; X64-NEXT:    br i1 [[TMP8]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
-; X64:       res_block:
-; X64-NEXT:    [[PHI_SRC1:%.*]] = phi i64 [ [[TMP6]], [[LOADBB:%.*]] ], [ [[TMP19:%.*]], [[LOADBB1]] ]
-; X64-NEXT:    [[PHI_SRC2:%.*]] = phi i64 [ [[TMP7]], [[LOADBB]] ], [ [[TMP20:%.*]], [[LOADBB1]] ]
-; X64-NEXT:    [[TMP9:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
-; X64-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
-; X64-NEXT:    br label [[ENDBLOCK:%.*]]
-; X64:       loadbb1:
-; X64-NEXT:    [[TMP11:%.*]] = bitcast i8* [[X]] to i16*
-; X64-NEXT:    [[TMP12:%.*]] = bitcast i8* [[Y]] to i16*
-; X64-NEXT:    [[TMP13:%.*]] = getelementptr i16, i16* [[TMP11]], i16 2
-; X64-NEXT:    [[TMP14:%.*]] = getelementptr i16, i16* [[TMP12]], i16 2
-; X64-NEXT:    [[TMP15:%.*]] = load i16, i16* [[TMP13]]
-; X64-NEXT:    [[TMP16:%.*]] = load i16, i16* [[TMP14]]
-; X64-NEXT:    [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
-; X64-NEXT:    [[TMP18:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP16]])
-; X64-NEXT:    [[TMP19]] = zext i16 [[TMP17]] to i64
-; X64-NEXT:    [[TMP20]] = zext i16 [[TMP18]] to i64
-; X64-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
-; X64-NEXT:    br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
-; X64:       endblock:
-; X64-NEXT:    [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP10]], [[RES_BLOCK]] ]
-; X64-NEXT:    ret i32 [[PHI_RES]]
+; ALL-LABEL: @cmp6(
+; ALL-NEXT:  loadbb:
+; ALL-NEXT:    [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; ALL-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; ALL-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; ALL-NEXT:    [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; ALL-NEXT:    [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; ALL-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
+; ALL-NEXT:    br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; ALL:       res_block:
+; ALL-NEXT:    [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; ALL-NEXT:    [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ]
+; ALL-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; ALL-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; ALL-NEXT:    br label [[ENDBLOCK:%.*]]
+; ALL:       loadbb1:
+; ALL-NEXT:    [[TMP9:%.*]] = bitcast i8* [[X]] to i16*
+; ALL-NEXT:    [[TMP10:%.*]] = bitcast i8* [[Y]] to i16*
+; ALL-NEXT:    [[TMP11:%.*]] = getelementptr i16, i16* [[TMP9]], i16 2
+; ALL-NEXT:    [[TMP12:%.*]] = getelementptr i16, i16* [[TMP10]], i16 2
+; ALL-NEXT:    [[TMP13:%.*]] = load i16, i16* [[TMP11]]
+; ALL-NEXT:    [[TMP14:%.*]] = load i16, i16* [[TMP12]]
+; ALL-NEXT:    [[TMP15:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP13]])
+; ALL-NEXT:    [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
+; ALL-NEXT:    [[TMP17]] = zext i16 [[TMP15]] to i32
+; ALL-NEXT:    [[TMP18]] = zext i16 [[TMP16]] to i32
+; ALL-NEXT:    [[TMP19:%.*]] = icmp eq i32 [[TMP17]], [[TMP18]]
+; ALL-NEXT:    br i1 [[TMP19]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; ALL:       endblock:
+; ALL-NEXT:    [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; ALL-NEXT:    ret i32 [[PHI_RES]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 6)
   ret i32 %call




More information about the llvm-commits mailing list