[llvm] [ExpandMemCmp] Optimize ExpandMemCmp to reduce instruction count on x86 (PR #69609)
Igor Kirillov via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 19 08:44:30 PDT 2023
https://github.com/igogo-x86 created https://github.com/llvm/llvm-project/pull/69609
Refactored the sequence of operations in MemCmpExpansion to zero-extend before byte-swapping. This change enables the generation of fewer instructions for x86, thereby improving code efficiency.
>From 67917cd9a679e7d0247b790d01cf8d13acef12bd Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Thu, 19 Oct 2023 15:12:45 +0000
Subject: [PATCH] [ExpandMemCmp] Optimize ExpandMemCmp to reduce instruction
count on x86
Refactored the sequence of operations in MemCmpExpansion to zero-extend
before byte-swapping. This change enables the generation of fewer instructions
for x86, thereby improving code efficiency.
---
llvm/lib/CodeGen/ExpandMemCmp.cpp | 31 +-
.../CodeGen/X86/memcmp-more-load-pairs-x32.ll | 26 +-
.../CodeGen/X86/memcmp-more-load-pairs.ll | 22 +-
llvm/test/CodeGen/X86/memcmp-optsize-x32.ll | 10 +-
llvm/test/CodeGen/X86/memcmp-optsize.ll | 10 +-
llvm/test/CodeGen/X86/memcmp-pgso-x32.ll | 10 +-
llvm/test/CodeGen/X86/memcmp-pgso.ll | 10 +-
llvm/test/CodeGen/X86/memcmp-x32.ll | 36 +-
llvm/test/CodeGen/X86/memcmp.ll | 32 +-
.../Transforms/ExpandMemCmp/X86/memcmp-x32.ll | 392 +++---
.../Transforms/ExpandMemCmp/X86/memcmp.ll | 1058 ++++++++---------
11 files changed, 801 insertions(+), 836 deletions(-)
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 911ebd41afc5b91..40fbe877ab7ac09 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -307,19 +307,20 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
if (!Rhs)
Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign);
+ // Zero extend if required.
+ if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType) {
+ Lhs = Builder.CreateZExt(Lhs, CmpSizeType);
+ Rhs = Builder.CreateZExt(Rhs, CmpSizeType);
+ }
+
// Swap bytes if required.
if (NeedsBSwap) {
+ Type *BSwapType = CmpSizeType ? CmpSizeType : LoadSizeType;
Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
- Intrinsic::bswap, LoadSizeType);
+ Intrinsic::bswap, BSwapType);
Lhs = Builder.CreateCall(Bswap, Lhs);
Rhs = Builder.CreateCall(Bswap, Rhs);
}
-
- // Zero extend if required.
- if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType) {
- Lhs = Builder.CreateZExt(Lhs, CmpSizeType);
- Rhs = Builder.CreateZExt(Rhs, CmpSizeType);
- }
return {Lhs, Rhs};
}
@@ -694,10 +695,10 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
/// %17 = getelementptr i32, i32* %15, i32 2
/// %18 = load i32, i32* %16
/// %19 = load i32, i32* %17
-/// %20 = call i32 @llvm.bswap.i32(i32 %18)
-/// %21 = call i32 @llvm.bswap.i32(i32 %19)
-/// %22 = zext i32 %20 to i64
-/// %23 = zext i32 %21 to i64
+/// %20 = zext i32 %18 to i64
+/// %21 = zext i32 %19 to i64
+/// %22 = call i64 @llvm.bswap.i64(i64 %20)
+/// %23 = call i64 @llvm.bswap.i64(i64 %21)
/// %24 = sub i64 %22, %23
/// %25 = icmp ne i64 %24, 0
/// br i1 %25, label %res_block, label %loadbb2
@@ -710,10 +711,10 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
/// %31 = getelementptr i16, i16* %29, i16 6
/// %32 = load i16, i16* %30
/// %33 = load i16, i16* %31
-/// %34 = call i16 @llvm.bswap.i16(i16 %32)
-/// %35 = call i16 @llvm.bswap.i16(i16 %33)
-/// %36 = zext i16 %34 to i64
-/// %37 = zext i16 %35 to i64
+/// %34 = zext i16 %32 to i64
+/// %35 = zext i16 %33 to i64
+/// %36 = call i64 @llvm.bswap.i64(i16 %34)
+/// %37 = call i64 @llvm.bswap.i64(i16 %35)
/// %38 = sub i64 %36, %37
/// %39 = icmp ne i64 %38, 0
/// br i1 %39, label %res_block, label %loadbb3
diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
index c0f8f86e6e8b107..a89571656e46951 100644
--- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
@@ -44,14 +44,12 @@ define i1 @length0_lt(ptr %X, ptr %Y) nounwind {
define i32 @length2(ptr %X, ptr %Y) nounwind {
; X86-LABEL: length2:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: movzwl %cx, %eax
-; X86-NEXT: movzwl %dx, %ecx
+; X86-NEXT: bswapl %eax
+; X86-NEXT: bswapl %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
@@ -75,14 +73,12 @@ define i1 @length2_eq(ptr %X, ptr %Y) nounwind {
define i1 @length2_lt(ptr %X, ptr %Y) nounwind {
; X86-LABEL: length2_lt:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: movzwl %cx, %eax
-; X86-NEXT: movzwl %dx, %ecx
+; X86-NEXT: bswapl %eax
+; X86-NEXT: bswapl %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
@@ -99,10 +95,8 @@ define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl (%ecx), %ecx
; X86-NEXT: movzwl (%eax), %eax
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %ax
-; X86-NEXT: movzwl %cx, %ecx
-; X86-NEXT: movzwl %ax, %eax
+; X86-NEXT: bswapl %ecx
+; X86-NEXT: bswapl %eax
; X86-NEXT: subl %eax, %ecx
; X86-NEXT: testl %ecx, %ecx
; X86-NEXT: setg %al
diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
index 56d06021867fa15..1f07ba39ecef9fe 100644
--- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
+++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
@@ -52,10 +52,8 @@ define i32 @length2(ptr %X, ptr %Y) nounwind {
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
+; X64-NEXT: bswapl %eax
+; X64-NEXT: bswapl %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
@@ -79,10 +77,8 @@ define i1 @length2_lt(ptr %X, ptr %Y) nounwind {
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
+; X64-NEXT: bswapl %eax
+; X64-NEXT: bswapl %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: shrl $31, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
@@ -97,10 +93,8 @@ define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
+; X64-NEXT: bswapl %eax
+; X64-NEXT: bswapl %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: testl %eax, %eax
; X64-NEXT: setg %al
@@ -511,8 +505,8 @@ define i32 @length12(ptr %X, ptr %Y) nounwind {
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movl 8(%rdi), %ecx
; X64-NEXT: movl 8(%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
+; X64-NEXT: bswapq %rcx
+; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB29_3
diff --git a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll
index 762691151f4bd3b..8efd4fca91a9972 100644
--- a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll
@@ -13,14 +13,12 @@ declare dso_local i32 @bcmp(ptr, ptr, i32)
define i32 @length2(ptr %X, ptr %Y) nounwind optsize {
; X86-LABEL: length2:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: movzwl %cx, %eax
-; X86-NEXT: movzwl %dx, %ecx
+; X86-NEXT: bswapl %eax
+; X86-NEXT: bswapl %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
diff --git a/llvm/test/CodeGen/X86/memcmp-optsize.ll b/llvm/test/CodeGen/X86/memcmp-optsize.ll
index c0c7b98d471cd46..a8df0ac1354f893 100644
--- a/llvm/test/CodeGen/X86/memcmp-optsize.ll
+++ b/llvm/test/CodeGen/X86/memcmp-optsize.ll
@@ -16,10 +16,8 @@ define i32 @length2(ptr %X, ptr %Y) nounwind optsize {
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
+; X64-NEXT: bswapl %eax
+; X64-NEXT: bswapl %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
@@ -251,8 +249,8 @@ define i32 @length12(ptr %X, ptr %Y) nounwind optsize {
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movl 8(%rdi), %ecx
; X64-NEXT: movl 8(%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
+; X64-NEXT: bswapq %rcx
+; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB15_3
diff --git a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll b/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll
index cb45fd3ebb9068c..b486eebd54b4a37 100644
--- a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll
@@ -13,14 +13,12 @@ declare dso_local i32 @bcmp(ptr, ptr, i32)
define i32 @length2(ptr %X, ptr %Y) nounwind !prof !14 {
; X86-LABEL: length2:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: movzwl %cx, %eax
-; X86-NEXT: movzwl %dx, %ecx
+; X86-NEXT: bswapl %eax
+; X86-NEXT: bswapl %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
diff --git a/llvm/test/CodeGen/X86/memcmp-pgso.ll b/llvm/test/CodeGen/X86/memcmp-pgso.ll
index 720344a22e43b5c..afb57c8101b8221 100644
--- a/llvm/test/CodeGen/X86/memcmp-pgso.ll
+++ b/llvm/test/CodeGen/X86/memcmp-pgso.ll
@@ -16,10 +16,8 @@ define i32 @length2(ptr %X, ptr %Y) nounwind !prof !14 {
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
+; X64-NEXT: bswapl %eax
+; X64-NEXT: bswapl %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
@@ -251,8 +249,8 @@ define i32 @length12(ptr %X, ptr %Y) nounwind !prof !14 {
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movl 8(%rdi), %ecx
; X64-NEXT: movl 8(%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
+; X64-NEXT: bswapq %rcx
+; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB15_3
diff --git a/llvm/test/CodeGen/X86/memcmp-x32.ll b/llvm/test/CodeGen/X86/memcmp-x32.ll
index ab439b32f2f1b20..f5b67ab45f7255c 100644
--- a/llvm/test/CodeGen/X86/memcmp-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-x32.ll
@@ -43,14 +43,12 @@ define i1 @length0_lt(ptr %X, ptr %Y) nounwind {
define i32 @length2(ptr %X, ptr %Y) nounwind {
; X86-LABEL: length2:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: movzwl %cx, %eax
-; X86-NEXT: movzwl %dx, %ecx
+; X86-NEXT: bswapl %eax
+; X86-NEXT: bswapl %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
@@ -62,9 +60,8 @@ define i32 @length2_const(ptr %X, ptr %Y) nounwind {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %eax
-; X86-NEXT: rolw $8, %ax
-; X86-NEXT: movzwl %ax, %eax
-; X86-NEXT: addl $-12594, %eax # imm = 0xCECE
+; X86-NEXT: bswapl %eax
+; X86-NEXT: addl $-825360384, %eax # imm = 0xCECE0000
; X86-NEXT: retl
%m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
ret i32 %m
@@ -75,9 +72,8 @@ define i1 @length2_gt_const(ptr %X, ptr %Y) nounwind {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %eax
-; X86-NEXT: rolw $8, %ax
-; X86-NEXT: movzwl %ax, %eax
-; X86-NEXT: addl $-12594, %eax # imm = 0xCECE
+; X86-NEXT: bswapl %eax
+; X86-NEXT: addl $-825360384, %eax # imm = 0xCECE0000
; X86-NEXT: testl %eax, %eax
; X86-NEXT: setg %al
; X86-NEXT: retl
@@ -103,14 +99,12 @@ define i1 @length2_eq(ptr %X, ptr %Y) nounwind {
define i1 @length2_lt(ptr %X, ptr %Y) nounwind {
; X86-LABEL: length2_lt:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: movzwl %cx, %eax
-; X86-NEXT: movzwl %dx, %ecx
+; X86-NEXT: bswapl %eax
+; X86-NEXT: bswapl %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
@@ -127,10 +121,8 @@ define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl (%ecx), %ecx
; X86-NEXT: movzwl (%eax), %eax
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %ax
-; X86-NEXT: movzwl %cx, %ecx
-; X86-NEXT: movzwl %ax, %eax
+; X86-NEXT: bswapl %ecx
+; X86-NEXT: bswapl %eax
; X86-NEXT: subl %eax, %ecx
; X86-NEXT: testl %ecx, %ecx
; X86-NEXT: setg %al
diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll
index 1330f3a241a5c2a..b8c0f509f1d081b 100644
--- a/llvm/test/CodeGen/X86/memcmp.ll
+++ b/llvm/test/CodeGen/X86/memcmp.ll
@@ -51,10 +51,8 @@ define i32 @length2(ptr %X, ptr %Y) nounwind {
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
+; X64-NEXT: bswapl %eax
+; X64-NEXT: bswapl %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
@@ -65,9 +63,8 @@ define i32 @length2_const(ptr %X, ptr %Y) nounwind {
; X64-LABEL: length2_const:
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: addl $-12594, %eax # imm = 0xCECE
+; X64-NEXT: bswapl %eax
+; X64-NEXT: addl $-825360384, %eax # imm = 0xCECE0000
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
ret i32 %m
@@ -77,9 +74,8 @@ define i1 @length2_gt_const(ptr %X, ptr %Y) nounwind {
; X64-LABEL: length2_gt_const:
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: addl $-12594, %eax # imm = 0xCECE
+; X64-NEXT: bswapl %eax
+; X64-NEXT: addl $-825360384, %eax # imm = 0xCECE0000
; X64-NEXT: testl %eax, %eax
; X64-NEXT: setg %al
; X64-NEXT: retq
@@ -105,10 +101,8 @@ define i1 @length2_lt(ptr %X, ptr %Y) nounwind {
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
+; X64-NEXT: bswapl %eax
+; X64-NEXT: bswapl %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: shrl $31, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
@@ -123,10 +117,8 @@ define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
+; X64-NEXT: bswapl %eax
+; X64-NEXT: bswapl %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: testl %eax, %eax
; X64-NEXT: setg %al
@@ -537,8 +529,8 @@ define i32 @length12(ptr %X, ptr %Y) nounwind {
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movl 8(%rdi), %ecx
; X64-NEXT: movl 8(%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
+; X64-NEXT: bswapq %rcx
+; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB31_3
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll
index f56d9688a01e12d..bd42d5f8d50859d 100644
--- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll
@@ -5,14 +5,14 @@ declare i32 @memcmp(ptr nocapture, ptr nocapture, i32)
define i32 @cmp2(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X32-LABEL: @cmp2(
-; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
-; X32-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
-; X32-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
-; X32-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32
-; X32-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
-; X32-NEXT: ret i32 [[TMP9]]
+; X32-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 1
+; X32-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y:%.*]], align 1
+; X32-NEXT: [[TMP3:%.*]] = zext i16 [[TMP1]] to i32
+; X32-NEXT: [[TMP4:%.*]] = zext i16 [[TMP2]] to i32
+; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X32-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X32-NEXT: ret i32 [[TMP7]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i32 2)
ret i32 %call
@@ -20,14 +20,14 @@ define i32 @cmp2(ptr nocapture readonly %x, ptr nocapture readonly %y) {
define i32 @cmp2_align2(ptr nocapture readonly align 2 %x, ptr nocapture readonly align 2 %y) {
; X32-LABEL: @cmp2_align2(
-; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 2
-; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 2
-; X32-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
-; X32-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
-; X32-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
-; X32-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32
-; X32-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
-; X32-NEXT: ret i32 [[TMP9]]
+; X32-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 2
+; X32-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y:%.*]], align 2
+; X32-NEXT: [[TMP3:%.*]] = zext i16 [[TMP1]] to i32
+; X32-NEXT: [[TMP4:%.*]] = zext i16 [[TMP2]] to i32
+; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X32-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X32-NEXT: ret i32 [[TMP7]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i32 2)
ret i32 %call
@@ -37,27 +37,27 @@ define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X32-LABEL: @cmp3(
; X32-NEXT: br label [[LOADBB:%.*]]
; X32: res_block:
-; X32-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP7:%.*]], [[TMP8:%.*]]
+; X32-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X32-NEXT: br label [[ENDBLOCK:%.*]]
; X32: loadbb:
-; X32-NEXT: [[TMP5:%.*]] = load i16, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP6:%.*]] = load i16, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]])
-; X32-NEXT: [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]])
-; X32-NEXT: [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]]
-; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1
+; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1
+; X32-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X32-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X32-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X32-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; X32: loadbb1:
-; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 2
-; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 2
-; X32-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1
-; X32-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP11]], align 1
-; X32-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
-; X32-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
-; X32-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X32-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X32-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X32-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X32-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X32-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
; X32-NEXT: br label [[ENDBLOCK]]
; X32: endblock:
-; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X32-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i32 3)
@@ -66,16 +66,16 @@ define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
define i32 @cmp4(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X32-LABEL: @cmp4(
-; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
-; X32-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
-; X32-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]]
-; X32-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP5]], [[TMP6]]
-; X32-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32
-; X32-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32
-; X32-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]]
-; X32-NEXT: ret i32 [[TMP11]]
+; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1
+; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1
+; X32-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X32-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X32-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X32-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X32-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X32-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X32-NEXT: ret i32 [[TMP9]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i32 4)
ret i32 %call
@@ -85,27 +85,27 @@ define i32 @cmp5(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X32-LABEL: @cmp5(
; X32-NEXT: br label [[LOADBB:%.*]]
; X32: res_block:
-; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP7:%.*]], [[TMP8:%.*]]
+; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X32-NEXT: br label [[ENDBLOCK:%.*]]
; X32: loadbb:
-; X32-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
-; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
-; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
-; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
+; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
+; X32-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X32-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X32-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; X32: loadbb1:
-; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X32-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1
-; X32-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP11]], align 1
-; X32-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
-; X32-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
-; X32-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X32-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X32-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X32-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X32-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X32-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
; X32-NEXT: br label [[ENDBLOCK]]
; X32: endblock:
-; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X32-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i32 5)
@@ -116,29 +116,29 @@ define i32 @cmp6(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X32-LABEL: @cmp6(
; X32-NEXT: br label [[LOADBB:%.*]]
; X32: res_block:
-; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ]
-; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ]
+; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X32-NEXT: br label [[ENDBLOCK:%.*]]
; X32: loadbb:
-; X32-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
-; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
-; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
-; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
+; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
+; X32-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X32-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X32-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X32: loadbb1:
-; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X32-NEXT: [[TMP14:%.*]] = load i16, ptr [[TMP10]], align 1
-; X32-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP11]], align 1
-; X32-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
-; X32-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
-; X32-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32
-; X32-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i32
-; X32-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP18]], [[TMP19]]
-; X32-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X32-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 1
+; X32-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1
+; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
+; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
+; X32-NEXT: [[TMP14]] = call i32 @llvm.bswap.i32(i32 [[TMP12]])
+; X32-NEXT: [[TMP15]] = call i32 @llvm.bswap.i32(i32 [[TMP13]])
+; X32-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP14]], [[TMP15]]
+; X32-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X32: endblock:
; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X32-NEXT: ret i32 [[PHI_RES]]
@@ -151,27 +151,27 @@ define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X32-LABEL: @cmp7(
; X32-NEXT: br label [[LOADBB:%.*]]
; X32: res_block:
-; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ]
-; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X32-NEXT: br label [[ENDBLOCK:%.*]]
; X32: loadbb:
-; X32-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
-; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
-; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
-; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
+; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
+; X32-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X32-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X32-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X32: loadbb1:
-; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 3
-; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 3
-; X32-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP10]], align 1
-; X32-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 1
-; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
-; X32-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])
-; X32-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]]
-; X32-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X32-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X32-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X32-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X32-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X32-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X32-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X32: endblock:
; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X32-NEXT: ret i32 [[PHI_RES]]
@@ -184,27 +184,27 @@ define i32 @cmp8(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X32-LABEL: @cmp8(
; X32-NEXT: br label [[LOADBB:%.*]]
; X32: res_block:
-; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ]
-; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X32-NEXT: br label [[ENDBLOCK:%.*]]
; X32: loadbb:
-; X32-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
-; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
-; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
-; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
+; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
+; X32-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X32-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X32-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X32: loadbb1:
-; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X32-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP10]], align 1
-; X32-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 1
-; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
-; X32-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])
-; X32-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]]
-; X32-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X32-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X32-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X32-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X32-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X32-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X32-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X32: endblock:
; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X32-NEXT: ret i32 [[PHI_RES]]
@@ -287,11 +287,11 @@ define i32 @cmp16(ptr nocapture readonly %x, ptr nocapture readonly %y) {
define i32 @cmp_eq2(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X32-LABEL: @cmp_eq2(
-; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
-; X32-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
-; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
+; X32-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 1
+; X32-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y:%.*]], align 1
+; X32-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X32-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
@@ -303,20 +303,20 @@ define i32 @cmp_eq2(ptr nocapture readonly %x, ptr nocapture readonly %y) {
define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X32-LABEL: @cmp_eq3(
-; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]]
-; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 2
-; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 2
-; X32-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1
-; X32-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
-; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16
-; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16
-; X32-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]]
-; X32-NEXT: [[TMP13:%.*]] = or i16 [[TMP5]], [[TMP12]]
-; X32-NEXT: [[TMP14:%.*]] = icmp ne i16 [[TMP13]], 0
-; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X32-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 1
+; X32-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y:%.*]], align 1
+; X32-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X32-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X32-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X32-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X32-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X32-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X32-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X32-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X32-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
@@ -328,11 +328,11 @@ define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
define i32 @cmp_eq4(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X32-LABEL: @cmp_eq4(
-; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
-; X32-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
-; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
+; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1
+; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1
+; X32-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X32-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
@@ -344,20 +344,20 @@ define i32 @cmp_eq4(ptr nocapture readonly %x, ptr nocapture readonly %y) {
define i32 @cmp_eq5(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X32-LABEL: @cmp_eq5(
-; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
-; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X32-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1
-; X32-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
-; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32
-; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32
-; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
-; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
-; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
-; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1
+; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1
+; X32-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X32-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X32-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X32-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X32-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X32-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X32-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X32-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
@@ -369,20 +369,20 @@ define i32 @cmp_eq5(ptr nocapture readonly %x, ptr nocapture readonly %y) {
define i32 @cmp_eq6(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X32-LABEL: @cmp_eq6(
-; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
-; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X32-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1
-; X32-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 1
-; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
-; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
-; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
-; X32-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]]
-; X32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
-; X32-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
-; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1
+; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1
+; X32-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X32-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X32-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X32-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32
+; X32-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i32
+; X32-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X32-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X32-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
@@ -394,20 +394,20 @@ define i32 @cmp_eq6(ptr nocapture readonly %x, ptr nocapture readonly %y) {
define i32 @cmp_eq6_align4(ptr nocapture readonly align 4 %x, ptr nocapture readonly align 4 %y) {
; X32-LABEL: @cmp_eq6_align4(
-; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 4
-; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 4
-; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
-; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X32-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 4
-; X32-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 4
-; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
-; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
-; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
-; X32-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]]
-; X32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
-; X32-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
-; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 4
+; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 4
+; X32-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X32-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 4
+; X32-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 4
+; X32-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32
+; X32-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i32
+; X32-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X32-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X32-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
@@ -419,18 +419,18 @@ define i32 @cmp_eq6_align4(ptr nocapture readonly align 4 %x, ptr nocapture read
define i32 @cmp_eq7(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X32-LABEL: @cmp_eq7(
-; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
-; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 3
-; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 3
-; X32-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1
-; X32-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1
-; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
-; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
-; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
-; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1
+; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1
+; X32-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X32-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X32-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X32-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X32-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
@@ -442,18 +442,18 @@ define i32 @cmp_eq7(ptr nocapture readonly %x, ptr nocapture readonly %y) {
define i32 @cmp_eq8(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X32-LABEL: @cmp_eq8(
-; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
-; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X32-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1
-; X32-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1
-; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
-; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
-; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
-; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1
+; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1
+; X32-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X32-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X32-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X32-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X32-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll
index 2594f53971393d4..57a242a77509aa6 100644
--- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll
@@ -6,14 +6,14 @@ declare i32 @memcmp(ptr nocapture, ptr nocapture, i64)
define i32 @cmp2(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64-LABEL: @cmp2(
-; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
-; X64-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
-; X64-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
-; X64-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32
-; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
-; X64-NEXT: ret i32 [[TMP9]]
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y:%.*]], align 1
+; X64-NEXT: [[TMP3:%.*]] = zext i16 [[TMP1]] to i32
+; X64-NEXT: [[TMP4:%.*]] = zext i16 [[TMP2]] to i32
+; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: ret i32 [[TMP7]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 2)
ret i32 %call
@@ -21,14 +21,14 @@ define i32 @cmp2(ptr nocapture readonly %x, ptr nocapture readonly %y) {
define i32 @cmp2_align2(ptr nocapture readonly align 2 %x, ptr nocapture readonly align 2 %y) {
; X64-LABEL: @cmp2_align2(
-; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 2
-; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 2
-; X64-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
-; X64-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
-; X64-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
-; X64-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32
-; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
-; X64-NEXT: ret i32 [[TMP9]]
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 2
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y:%.*]], align 2
+; X64-NEXT: [[TMP3:%.*]] = zext i16 [[TMP1]] to i32
+; X64-NEXT: [[TMP4:%.*]] = zext i16 [[TMP2]] to i32
+; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: ret i32 [[TMP7]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 2)
ret i32 %call
@@ -38,27 +38,27 @@ define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64-LABEL: @cmp3(
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP7:%.*]], [[TMP8:%.*]]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i16, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i16, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1
+; X64-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 2
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 2
-; X64-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
-; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
-; X64-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
; X64-NEXT: br label [[ENDBLOCK]]
; X64: endblock:
-; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 3)
@@ -67,16 +67,16 @@ define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
define i32 @cmp4(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64-LABEL: @cmp4(
-; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
-; X64-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
-; X64-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]]
-; X64-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP5]], [[TMP6]]
-; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32
-; X64-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32
-; X64-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]]
-; X64-NEXT: ret i32 [[TMP11]]
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-NEXT: ret i32 [[TMP9]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 4)
ret i32 %call
@@ -86,27 +86,27 @@ define i32 @cmp5(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64-LABEL: @cmp5(
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP7:%.*]], [[TMP8:%.*]]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
+; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X64-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
-; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
-; X64-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
; X64-NEXT: br label [[ENDBLOCK]]
; X64: endblock:
-; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 5)
@@ -117,29 +117,29 @@ define i32 @cmp6(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64-LABEL: @cmp6(
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ]
-; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
+; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X64-NEXT: [[TMP14:%.*]] = load i16, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
-; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
-; X64-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32
-; X64-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i32
-; X64-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP18]], [[TMP19]]
-; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
+; X64-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
+; X64-NEXT: [[TMP14]] = call i32 @llvm.bswap.i32(i32 [[TMP12]])
+; X64-NEXT: [[TMP15]] = call i32 @llvm.bswap.i32(i32 [[TMP13]])
+; X64-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP14]], [[TMP15]]
+; X64-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
@@ -152,27 +152,27 @@ define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64-LABEL: @cmp7(
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ]
-; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
+; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 3
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 3
-; X64-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
-; X64-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])
-; X64-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]]
-; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
@@ -183,16 +183,16 @@ define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y) {
define i32 @cmp8(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64-LABEL: @cmp8(
-; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
-; X64-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
-; X64-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP5]], [[TMP6]]
-; X64-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]]
-; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32
-; X64-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32
-; X64-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]]
-; X64-NEXT: ret i32 [[TMP11]]
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32
+; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32
+; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
+; X64-NEXT: ret i32 [[TMP9]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 8)
ret i32 %call
@@ -202,27 +202,27 @@ define i32 @cmp9(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64-LABEL: @cmp9(
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP7:%.*]], [[TMP8:%.*]]
+; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP5:%.*]], [[TMP6:%.*]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 8
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 8
-; X64-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
-; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
-; X64-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32
+; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]]
; X64-NEXT: br label [[ENDBLOCK]]
; X64: endblock:
-; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 9)
@@ -233,29 +233,29 @@ define i32 @cmp10(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64-LABEL: @cmp10(
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ]
-; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 8
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 8
-; X64-NEXT: [[TMP14:%.*]] = load i16, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
-; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
-; X64-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i64
-; X64-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i64
-; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP18]], [[TMP19]]
-; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i64
+; X64-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i64
+; X64-NEXT: [[TMP14]] = call i64 @llvm.bswap.i64(i64 [[TMP12]])
+; X64-NEXT: [[TMP15]] = call i64 @llvm.bswap.i64(i64 [[TMP13]])
+; X64-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
@@ -268,27 +268,27 @@ define i32 @cmp11(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64-LABEL: @cmp11(
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ]
-; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 3
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 3
-; X64-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]])
-; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]])
-; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]]
-; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
@@ -301,29 +301,29 @@ define i32 @cmp12(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64-LABEL: @cmp12(
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ]
-; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 8
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 8
-; X64-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
-; X64-NEXT: [[TMP17:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])
-; X64-NEXT: [[TMP18]] = zext i32 [[TMP16]] to i64
-; X64-NEXT: [[TMP19]] = zext i32 [[TMP17]] to i64
-; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP18]], [[TMP19]]
-; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12:%.*]] = zext i32 [[TMP10]] to i64
+; X64-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64
+; X64-NEXT: [[TMP14]] = call i64 @llvm.bswap.i64(i64 [[TMP12]])
+; X64-NEXT: [[TMP15]] = call i64 @llvm.bswap.i64(i64 [[TMP13]])
+; X64-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]]
+; X64-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
@@ -336,27 +336,27 @@ define i32 @cmp13(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64-LABEL: @cmp13(
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ]
-; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 5
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 5
-; X64-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]])
-; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]])
-; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]]
-; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
@@ -369,27 +369,27 @@ define i32 @cmp14(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64-LABEL: @cmp14(
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ]
-; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 6
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 6
-; X64-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]])
-; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]])
-; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]]
-; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
@@ -402,27 +402,27 @@ define i32 @cmp15(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64-LABEL: @cmp15(
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ]
-; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 7
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 7
-; X64-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]])
-; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]])
-; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]]
-; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
@@ -435,27 +435,27 @@ define i32 @cmp16(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64-LABEL: @cmp16(
; X64-NEXT: br label [[LOADBB:%.*]]
; X64: res_block:
-; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ]
-; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb:
-; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
-; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
-; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
-; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
+; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]]
+; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
; X64: loadbb1:
-; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 8
-; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 8
-; X64-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 1
-; X64-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 1
-; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]])
-; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]])
-; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]]
-; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1
+; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1
+; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]])
+; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]])
+; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]]
+; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
@@ -466,11 +466,11 @@ define i32 @cmp16(ptr nocapture readonly %x, ptr nocapture readonly %y) {
define i32 @cmp_eq2(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64-LABEL: @cmp_eq2(
-; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
-; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
-; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
+; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y:%.*]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64-NEXT: ret i32 [[CONV]]
;
@@ -486,17 +486,17 @@ define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y:%.*]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 2
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 2
-; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
-; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
@@ -504,20 +504,20 @@ define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD-NEXT: ret i32 [[CONV]]
;
; X64_2LD-LABEL: @cmp_eq3(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 2
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 2
-; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16
-; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16
-; X64_2LD-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]]
-; X64_2LD-NEXT: [[TMP13:%.*]] = or i16 [[TMP5]], [[TMP12]]
-; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i16 [[TMP13]], 0
-; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y:%.*]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16
+; X64_2LD-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16
+; X64_2LD-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]]
+; X64_2LD-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]]
+; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0
+; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -529,11 +529,11 @@ define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
define i32 @cmp_eq4(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64-LABEL: @cmp_eq4(
-; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
-; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
-; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
+; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64-NEXT: ret i32 [[CONV]]
;
@@ -549,17 +549,17 @@ define i32 @cmp_eq5(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
-; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
@@ -567,20 +567,20 @@ define i32 @cmp_eq5(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD-NEXT: ret i32 [[CONV]]
;
; X64_2LD-LABEL: @cmp_eq5(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32
-; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32
-; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
-; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
-; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
-; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32
+; X64_2LD-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32
+; X64_2LD-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64_2LD-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -596,17 +596,17 @@ define i32 @cmp_eq6(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
-; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i16 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
@@ -614,20 +614,20 @@ define i32 @cmp_eq6(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD-NEXT: ret i32 [[CONV]]
;
; X64_2LD-LABEL: @cmp_eq6(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
-; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
-; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
-; X64_2LD-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]]
-; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
-; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32
+; X64_2LD-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i32
+; X64_2LD-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64_2LD-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -643,17 +643,17 @@ define i32 @cmp_eq6_align4(ptr nocapture readonly align 4 %x, ptr nocapture read
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 4
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 4
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 4
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 4
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 4
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 4
-; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
-; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 4
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 4
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i16 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
@@ -661,20 +661,20 @@ define i32 @cmp_eq6_align4(ptr nocapture readonly align 4 %x, ptr nocapture read
; X64_1LD-NEXT: ret i32 [[CONV]]
;
; X64_2LD-LABEL: @cmp_eq6_align4(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 4
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 4
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 4
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 4
-; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
-; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
-; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
-; X64_2LD-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]]
-; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
-; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 4
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 4
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 4
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 4
+; X64_2LD-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32
+; X64_2LD-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i32
+; X64_2LD-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]]
+; X64_2LD-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]]
+; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -690,17 +690,17 @@ define i32 @cmp_eq7(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 3
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 3
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
-; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
@@ -708,18 +708,18 @@ define i32 @cmp_eq7(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD-NEXT: ret i32 [[CONV]]
;
; X64_2LD-LABEL: @cmp_eq7(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 3
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 3
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
-; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
-; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
-; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]]
+; X64_2LD-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+; X64_2LD-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -731,11 +731,11 @@ define i32 @cmp_eq7(ptr nocapture readonly %x, ptr nocapture readonly %y) {
define i32 @cmp_eq8(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64-LABEL: @cmp_eq8(
-; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
-; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
-; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
+; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64-NEXT: ret i32 [[CONV]]
;
@@ -751,17 +751,17 @@ define i32 @cmp_eq9(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8
-; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
-; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
@@ -769,20 +769,20 @@ define i32 @cmp_eq9(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD-NEXT: ret i32 [[CONV]]
;
; X64_2LD-LABEL: @cmp_eq9(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8
-; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i64
-; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i64
-; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
-; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
-; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
-; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
+; X64_2LD-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64
+; X64_2LD-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64_2LD-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -798,17 +798,17 @@ define i32 @cmp_eq10(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
-; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i16 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
@@ -816,20 +816,20 @@ define i32 @cmp_eq10(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD-NEXT: ret i32 [[CONV]]
;
; X64_2LD-LABEL: @cmp_eq10(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i64
-; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i64
-; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]]
-; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]]
-; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0
-; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64
+; X64_2LD-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64
+; X64_2LD-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64_2LD-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -845,17 +845,17 @@ define i32 @cmp_eq11(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 3
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 3
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
-; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
@@ -863,18 +863,18 @@ define i32 @cmp_eq11(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD-NEXT: ret i32 [[CONV]]
;
; X64_2LD-LABEL: @cmp_eq11(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 3
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 3
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
-; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
-; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
-; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64_2LD-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64_2LD-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -890,17 +890,17 @@ define i32 @cmp_eq12(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
-; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
@@ -908,20 +908,20 @@ define i32 @cmp_eq12(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD-NEXT: ret i32 [[CONV]]
;
; X64_2LD-LABEL: @cmp_eq12(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP12:%.*]] = zext i32 [[TMP10]] to i64
-; X64_2LD-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64
-; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]]
-; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]]
-; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0
-; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; X64_2LD-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
+; X64_2LD-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]]
+; X64_2LD-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -937,17 +937,17 @@ define i32 @cmp_eq13(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 5
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 5
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
-; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
@@ -955,18 +955,18 @@ define i32 @cmp_eq13(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD-NEXT: ret i32 [[CONV]]
;
; X64_2LD-LABEL: @cmp_eq13(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 5
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 5
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
-; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
-; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
-; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64_2LD-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64_2LD-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -982,17 +982,17 @@ define i32 @cmp_eq14(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 6
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 6
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
-; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
@@ -1000,18 +1000,18 @@ define i32 @cmp_eq14(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD-NEXT: ret i32 [[CONV]]
;
; X64_2LD-LABEL: @cmp_eq14(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 6
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 6
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
-; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
-; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
-; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64_2LD-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64_2LD-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -1027,17 +1027,17 @@ define i32 @cmp_eq15(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD: res_block:
; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
; X64_1LD: loadbb:
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]]
+; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
-; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
-; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 7
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1
-; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
-; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64_1LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64_1LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]]
+; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
@@ -1045,18 +1045,18 @@ define i32 @cmp_eq15(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64_1LD-NEXT: ret i32 [[CONV]]
;
; X64_2LD-LABEL: @cmp_eq15(
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1
-; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7
-; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 7
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1
-; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
-; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
-; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
-; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
-; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1
+; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1
+; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7
+; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7
+; X64_2LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1
+; X64_2LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1
+; X64_2LD-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]]
+; X64_2LD-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
+; X64_2LD-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
+; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0
; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64_2LD-NEXT: ret i32 [[CONV]]
;
@@ -1068,11 +1068,11 @@ define i32 @cmp_eq15(ptr nocapture readonly %x, ptr nocapture readonly %y) {
define i32 @cmp_eq16(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X64-LABEL: @cmp_eq16(
-; X64-NEXT: [[TMP3:%.*]] = load i128, ptr [[X:%.*]], align 1
-; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[Y:%.*]], align 1
-; X64-NEXT: [[TMP5:%.*]] = icmp ne i128 [[TMP3]], [[TMP4]]
-; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
-; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
+; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X:%.*]], align 1
+; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y:%.*]], align 1
+; X64-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]]
+; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X64-NEXT: ret i32 [[CONV]]
;
More information about the llvm-commits
mailing list