[llvm] r309711 - [CGP] use narrower types in memcmp expansion when possible
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 1 10:24:54 PDT 2017
Author: spatel
Date: Tue Aug 1 10:24:54 2017
New Revision: 309711
URL: http://llvm.org/viewvc/llvm-project?rev=309711&view=rev
Log:
[CGP] use narrower types in memcmp expansion when possible
This only affects very small memcmp on x86 for now, but it
will become more important if we allow vector-sized load and
compares.
Modified:
llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
llvm/trunk/test/CodeGen/X86/memcmp-optsize.ll
llvm/trunk/test/CodeGen/X86/memcmp.ll
llvm/trunk/test/Transforms/CodeGenPrepare/X86/memcmp.ll
Modified: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp?rev=309711&r1=309710&r2=309711&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp Tue Aug 1 10:24:54 2017
@@ -2271,8 +2271,12 @@ static bool expandMemCmp(CallInst *CI, c
return false;
}
- // Early exit from expansion if size greater than max bytes to load.
+ // Scale the max size down if the target can load more bytes than we need.
uint64_t SizeVal = SizeCast->getZExtValue();
+ if (MaxLoadSize > SizeVal)
+ MaxLoadSize = 1 << SizeCast->getValue().logBase2();
+
+ // Calculate how many load pairs are needed for the constant size.
unsigned NumLoads = 0;
unsigned RemainingSize = SizeVal;
unsigned LoadSize = MaxLoadSize;
@@ -2282,6 +2286,7 @@ static bool expandMemCmp(CallInst *CI, c
LoadSize = LoadSize / 2;
}
+ // Don't expand if this will require more loads than desired by the target.
if (NumLoads > TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize())) {
NumMemCmpGreaterThanMax++;
return false;
Modified: llvm/trunk/test/CodeGen/X86/memcmp-optsize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memcmp-optsize.ll?rev=309711&r1=309710&r2=309711&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memcmp-optsize.ll (original)
+++ llvm/trunk/test/CodeGen/X86/memcmp-optsize.ll Tue Aug 1 10:24:54 2017
@@ -117,9 +117,7 @@ define i32 @length3(i8* %X, i8* %Y) noun
; X86-NEXT: movzwl (%ecx), %esi
; X86-NEXT: rolw $8, %dx
; X86-NEXT: rolw $8, %si
-; X86-NEXT: movzwl %dx, %edx
-; X86-NEXT: movzwl %si, %esi
-; X86-NEXT: cmpl %esi, %edx
+; X86-NEXT: cmpw %si, %dx
; X86-NEXT: jne .LBB4_1
; X86-NEXT: # BB#2: # %loadbb1
; X86-NEXT: movzbl 2(%eax), %eax
@@ -131,7 +129,7 @@ define i32 @length3(i8* %X, i8* %Y) noun
; X86-NEXT: incl %ecx
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: decl %eax
-; X86-NEXT: cmpl %esi, %edx
+; X86-NEXT: cmpw %si, %dx
; X86-NEXT: cmovael %ecx, %eax
; X86-NEXT: .LBB4_3: # %endblock
; X86-NEXT: popl %esi
@@ -143,9 +141,7 @@ define i32 @length3(i8* %X, i8* %Y) noun
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
-; X64-NEXT: cmpq %rcx, %rax
+; X64-NEXT: cmpw %cx, %ax
; X64-NEXT: jne .LBB4_1
; X64-NEXT: # BB#2: # %loadbb1
; X64-NEXT: movzbl 2(%rdi), %eax
@@ -306,7 +302,7 @@ define i32 @length5(i8* %X, i8* %Y) noun
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %eax
; X64-NEXT: bswapl %ecx
-; X64-NEXT: cmpq %rcx, %rax
+; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: jne .LBB9_1
; X64-NEXT: # BB#2: # %loadbb1
; X64-NEXT: movzbl 4(%rdi), %eax
Modified: llvm/trunk/test/CodeGen/X86/memcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memcmp.ll?rev=309711&r1=309710&r2=309711&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memcmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/memcmp.ll Tue Aug 1 10:24:54 2017
@@ -117,9 +117,7 @@ define i32 @length3(i8* %X, i8* %Y) noun
; X86-NEXT: movzwl (%ecx), %esi
; X86-NEXT: rolw $8, %dx
; X86-NEXT: rolw $8, %si
-; X86-NEXT: movzwl %dx, %edx
-; X86-NEXT: movzwl %si, %esi
-; X86-NEXT: cmpl %esi, %edx
+; X86-NEXT: cmpw %si, %dx
; X86-NEXT: jne .LBB4_1
; X86-NEXT: # BB#2: # %loadbb1
; X86-NEXT: movzbl 2(%eax), %eax
@@ -140,9 +138,7 @@ define i32 @length3(i8* %X, i8* %Y) noun
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
-; X64-NEXT: cmpq %rcx, %rax
+; X64-NEXT: cmpw %cx, %ax
; X64-NEXT: jne .LBB4_1
; X64-NEXT: # BB#2: # %loadbb1
; X64-NEXT: movzbl 2(%rdi), %eax
@@ -299,7 +295,7 @@ define i32 @length5(i8* %X, i8* %Y) noun
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %eax
; X64-NEXT: bswapl %ecx
-; X64-NEXT: cmpq %rcx, %rax
+; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: jne .LBB9_1
; X64-NEXT: # BB#2: # %loadbb1
; X64-NEXT: movzbl 4(%rdi), %eax
Modified: llvm/trunk/test/Transforms/CodeGenPrepare/X86/memcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/memcmp.ll?rev=309711&r1=309710&r2=309711&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/memcmp.ll (original)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/memcmp.ll Tue Aug 1 10:24:54 2017
@@ -22,63 +22,32 @@ define i32 @cmp2(i8* nocapture readonly
}
define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; X32-LABEL: @cmp3(
-; X32-NEXT: loadbb:
-; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i16*
-; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i16*
-; X32-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]]
-; X32-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
-; X32-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
-; X32-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
-; X32-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
-; X32-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
-; X32-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP6]], [[TMP7]]
-; X32-NEXT: br i1 [[TMP8]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
-; X32: res_block:
-; X32-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP6]], [[TMP7]]
-; X32-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
-; X32-NEXT: br label [[ENDBLOCK:%.*]]
-; X32: loadbb1:
-; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[X]], i8 2
-; X32-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 2
-; X32-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]]
-; X32-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP12]]
-; X32-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
-; X32-NEXT: [[TMP16:%.*]] = zext i8 [[TMP14]] to i32
-; X32-NEXT: [[TMP17:%.*]] = sub i32 [[TMP15]], [[TMP16]]
-; X32-NEXT: br label [[ENDBLOCK]]
-; X32: endblock:
-; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP17]], [[LOADBB1]] ], [ [[TMP10]], [[RES_BLOCK]] ]
-; X32-NEXT: ret i32 [[PHI_RES]]
-;
-; X64-LABEL: @cmp3(
-; X64-NEXT: loadbb:
-; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i16*
-; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i16*
-; X64-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]]
-; X64-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
-; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
-; X64-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
-; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i64
-; X64-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i64
-; X64-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP6]], [[TMP7]]
-; X64-NEXT: br i1 [[TMP8]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
-; X64: res_block:
-; X64-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
-; X64-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
-; X64-NEXT: br label [[ENDBLOCK:%.*]]
-; X64: loadbb1:
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[X]], i8 2
-; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 2
-; X64-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]]
-; X64-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP12]]
-; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
-; X64-NEXT: [[TMP16:%.*]] = zext i8 [[TMP14]] to i32
-; X64-NEXT: [[TMP17:%.*]] = sub i32 [[TMP15]], [[TMP16]]
-; X64-NEXT: br label [[ENDBLOCK]]
-; X64: endblock:
-; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP17]], [[LOADBB1]] ], [ [[TMP10]], [[RES_BLOCK]] ]
-; X64-NEXT: ret i32 [[PHI_RES]]
+; ALL-LABEL: @cmp3(
+; ALL-NEXT: loadbb:
+; ALL-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i16*
+; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i16*
+; ALL-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]]
+; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
+; ALL-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; ALL-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; ALL-NEXT: [[TMP6:%.*]] = icmp eq i16 [[TMP4]], [[TMP5]]
+; ALL-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; ALL: res_block:
+; ALL-NEXT: [[TMP7:%.*]] = icmp ult i16 [[TMP4]], [[TMP5]]
+; ALL-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; ALL-NEXT: br label [[ENDBLOCK:%.*]]
+; ALL: loadbb1:
+; ALL-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[X]], i8 2
+; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[Y]], i8 2
+; ALL-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]]
+; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]]
+; ALL-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; ALL-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
+; ALL-NEXT: [[TMP15:%.*]] = sub i32 [[TMP13]], [[TMP14]]
+; ALL-NEXT: br label [[ENDBLOCK]]
+; ALL: endblock:
+; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP15]], [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; ALL-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 3)
ret i32 %call
@@ -104,134 +73,70 @@ define i32 @cmp4(i8* nocapture readonly
}
define i32 @cmp5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; X32-LABEL: @cmp5(
-; X32-NEXT: loadbb:
-; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
-; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
-; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
-; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
-; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
-; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
-; X32: res_block:
-; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]]
-; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
-; X32-NEXT: br label [[ENDBLOCK:%.*]]
-; X32: loadbb1:
-; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[X]], i8 4
-; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[Y]], i8 4
-; X32-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]]
-; X32-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]]
-; X32-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
-; X32-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
-; X32-NEXT: [[TMP15:%.*]] = sub i32 [[TMP13]], [[TMP14]]
-; X32-NEXT: br label [[ENDBLOCK]]
-; X32: endblock:
-; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP15]], [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
-; X32-NEXT: ret i32 [[PHI_RES]]
-;
-; X64-LABEL: @cmp5(
-; X64-NEXT: loadbb:
-; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
-; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
-; X64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
-; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
-; X64-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
-; X64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
-; X64-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP6]], [[TMP7]]
-; X64-NEXT: br i1 [[TMP8]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
-; X64: res_block:
-; X64-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
-; X64-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
-; X64-NEXT: br label [[ENDBLOCK:%.*]]
-; X64: loadbb1:
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[X]], i8 4
-; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 4
-; X64-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]]
-; X64-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP12]]
-; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
-; X64-NEXT: [[TMP16:%.*]] = zext i8 [[TMP14]] to i32
-; X64-NEXT: [[TMP17:%.*]] = sub i32 [[TMP15]], [[TMP16]]
-; X64-NEXT: br label [[ENDBLOCK]]
-; X64: endblock:
-; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP17]], [[LOADBB1]] ], [ [[TMP10]], [[RES_BLOCK]] ]
-; X64-NEXT: ret i32 [[PHI_RES]]
+; ALL-LABEL: @cmp5(
+; ALL-NEXT: loadbb:
+; ALL-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; ALL-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; ALL-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; ALL-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; ALL-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
+; ALL-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; ALL: res_block:
+; ALL-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]]
+; ALL-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; ALL-NEXT: br label [[ENDBLOCK:%.*]]
+; ALL: loadbb1:
+; ALL-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[X]], i8 4
+; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[Y]], i8 4
+; ALL-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]]
+; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]]
+; ALL-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; ALL-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
+; ALL-NEXT: [[TMP15:%.*]] = sub i32 [[TMP13]], [[TMP14]]
+; ALL-NEXT: br label [[ENDBLOCK]]
+; ALL: endblock:
+; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP15]], [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; ALL-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 5)
ret i32 %call
}
define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; X32-LABEL: @cmp6(
-; X32-NEXT: loadbb:
-; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
-; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
-; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
-; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
-; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
-; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
-; X32: res_block:
-; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
-; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ]
-; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
-; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
-; X32-NEXT: br label [[ENDBLOCK:%.*]]
-; X32: loadbb1:
-; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i16*
-; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i16*
-; X32-NEXT: [[TMP11:%.*]] = getelementptr i16, i16* [[TMP9]], i16 2
-; X32-NEXT: [[TMP12:%.*]] = getelementptr i16, i16* [[TMP10]], i16 2
-; X32-NEXT: [[TMP13:%.*]] = load i16, i16* [[TMP11]]
-; X32-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]]
-; X32-NEXT: [[TMP15:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP13]])
-; X32-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
-; X32-NEXT: [[TMP17]] = zext i16 [[TMP15]] to i32
-; X32-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32
-; X32-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP17]], [[TMP18]]
-; X32-NEXT: br i1 [[TMP19]], label [[ENDBLOCK]], label [[RES_BLOCK]]
-; X32: endblock:
-; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
-; X32-NEXT: ret i32 [[PHI_RES]]
-;
-; X64-LABEL: @cmp6(
-; X64-NEXT: loadbb:
-; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
-; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
-; X64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
-; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
-; X64-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
-; X64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
-; X64-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP6]], [[TMP7]]
-; X64-NEXT: br i1 [[TMP8]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
-; X64: res_block:
-; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP6]], [[LOADBB:%.*]] ], [ [[TMP19:%.*]], [[LOADBB1]] ]
-; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP7]], [[LOADBB]] ], [ [[TMP20:%.*]], [[LOADBB1]] ]
-; X64-NEXT: [[TMP9:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
-; X64-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
-; X64-NEXT: br label [[ENDBLOCK:%.*]]
-; X64: loadbb1:
-; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[X]] to i16*
-; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[Y]] to i16*
-; X64-NEXT: [[TMP13:%.*]] = getelementptr i16, i16* [[TMP11]], i16 2
-; X64-NEXT: [[TMP14:%.*]] = getelementptr i16, i16* [[TMP12]], i16 2
-; X64-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]]
-; X64-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP14]]
-; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
-; X64-NEXT: [[TMP18:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP16]])
-; X64-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i64
-; X64-NEXT: [[TMP20]] = zext i16 [[TMP18]] to i64
-; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
-; X64-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
-; X64: endblock:
-; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP10]], [[RES_BLOCK]] ]
-; X64-NEXT: ret i32 [[PHI_RES]]
+; ALL-LABEL: @cmp6(
+; ALL-NEXT: loadbb:
+; ALL-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; ALL-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; ALL-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; ALL-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; ALL-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
+; ALL-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; ALL: res_block:
+; ALL-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; ALL-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ]
+; ALL-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; ALL-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; ALL-NEXT: br label [[ENDBLOCK:%.*]]
+; ALL: loadbb1:
+; ALL-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i16*
+; ALL-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i16*
+; ALL-NEXT: [[TMP11:%.*]] = getelementptr i16, i16* [[TMP9]], i16 2
+; ALL-NEXT: [[TMP12:%.*]] = getelementptr i16, i16* [[TMP10]], i16 2
+; ALL-NEXT: [[TMP13:%.*]] = load i16, i16* [[TMP11]]
+; ALL-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]]
+; ALL-NEXT: [[TMP15:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP13]])
+; ALL-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
+; ALL-NEXT: [[TMP17]] = zext i16 [[TMP15]] to i32
+; ALL-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32
+; ALL-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP17]], [[TMP18]]
+; ALL-NEXT: br i1 [[TMP19]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; ALL: endblock:
+; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; ALL-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 6)
ret i32 %call
More information about the llvm-commits
mailing list