[llvm] 7aecf23 - [ExpandMemCmp] Correctly set alignment of generated loads
Juneyoung Lee via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 16 06:39:57 PDT 2020
Author: Juneyoung Lee
Date: 2020-03-16T22:39:48+09:00
New Revision: 7aecf2323c4ef007ed443d9a58703fe08815b805
URL: https://github.com/llvm/llvm-project/commit/7aecf2323c4ef007ed443d9a58703fe08815b805
DIFF: https://github.com/llvm/llvm-project/commit/7aecf2323c4ef007ed443d9a58703fe08815b805.diff
LOG: [ExpandMemCmp] Correctly set alignment of generated loads
Summary:
This is a part of the series of efforts for correcting alignment of memory operations.
(Another related bugs: https://bugs.llvm.org/show_bug.cgi?id=44388 , https://bugs.llvm.org/show_bug.cgi?id=44543 )
This fixes https://bugs.llvm.org/show_bug.cgi?id=43880 by giving default alignment of loads to 1.
The test CodeGen/AArch64/bcmp-inline-small.ll should have been changed; it was introduced by https://reviews.llvm.org/D64805 . I talked with @evandro, and confirmed that the test is okay to be changed.
Other two tests from PowerPC needed changes as well, but fixes were straightforward.
Reviewers: courbet
Reviewed By: courbet
Subscribers: nlopes, gchatelet, wuzish, nemanjai, kristof.beyls, hiraditya, steven.zhang, danielkiss, llvm-commits, evandro
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D76113
Added:
Modified:
llvm/lib/CodeGen/ExpandMemCmp.cpp
llvm/test/CodeGen/AArch64/bcmp-inline-small.ll
llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll
llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 213416d08610..7cf3f0863e5b 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -273,6 +273,8 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
// Get the memory source at offset `OffsetBytes`.
Value *LhsSource = CI->getArgOperand(0);
Value *RhsSource = CI->getArgOperand(1);
+ Align LhsAlign = LhsSource->getPointerAlignment(DL).valueOrOne();
+ Align RhsAlign = RhsSource->getPointerAlignment(DL).valueOrOne();
if (OffsetBytes > 0) {
auto *ByteType = Type::getInt8Ty(CI->getContext());
LhsSource = Builder.CreateConstGEP1_64(
@@ -281,6 +283,8 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
RhsSource = Builder.CreateConstGEP1_64(
ByteType, Builder.CreateBitCast(RhsSource, ByteType->getPointerTo()),
OffsetBytes);
+ LhsAlign = commonAlignment(LhsAlign, OffsetBytes);
+ RhsAlign = commonAlignment(RhsAlign, OffsetBytes);
}
LhsSource = Builder.CreateBitCast(LhsSource, LoadSizeType->getPointerTo());
RhsSource = Builder.CreateBitCast(RhsSource, LoadSizeType->getPointerTo());
@@ -290,13 +294,13 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
if (auto *C = dyn_cast<Constant>(LhsSource))
Lhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
if (!Lhs)
- Lhs = Builder.CreateLoad(LoadSizeType, LhsSource);
+ Lhs = Builder.CreateAlignedLoad(LoadSizeType, LhsSource, LhsAlign);
Value *Rhs = nullptr;
if (auto *C = dyn_cast<Constant>(RhsSource))
Rhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
if (!Rhs)
- Rhs = Builder.CreateLoad(LoadSizeType, RhsSource);
+ Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign);
// Swap bytes if required.
if (NeedsBSwap) {
diff --git a/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll b/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll
index da42b1d6863c..a7d08565c4c4 100644
--- a/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll
+++ b/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll
@@ -4,13 +4,28 @@
declare i32 @bcmp(i8*, i8*, i64) nounwind readonly
declare i32 @memcmp(i8*, i8*, i64) nounwind readonly
-define i1 @bcmp_b2(i8* %s1, i8* %s2) {
+define i1 @test_b2(i8* %s1, i8* %s2) {
entry:
%bcmp = call i32 @bcmp(i8* %s1, i8* %s2, i64 15)
%ret = icmp eq i32 %bcmp, 0
ret i1 %ret
-; CHECK-LABEL: bcmp_b2:
+; CHECK-LABEL: test_b2:
+; CHECK-NOT: bl bcmp
+; CHECKN: ldr x
+; CHECKN-NEXT: ldr x
+; CHECKN-NEXT: ldur x
+; CHECKN-NEXT: ldur x
+; CHECKS-COUNT-30: ldrb w
+}
+
+define i1 @test_b2_align8(i8* align 8 %s1, i8* align 8 %s2) {
+entry:
+ %bcmp = call i32 @bcmp(i8* %s1, i8* %s2, i64 15)
+ %ret = icmp eq i32 %bcmp, 0
+ ret i1 %ret
+
+; CHECK-LABEL: test_b2_align8:
; CHECK-NOT: bl bcmp
; CHECKN: ldr x
; CHECKN-NEXT: ldr x
@@ -20,19 +35,19 @@ entry:
; CHECKS-NEXT: ldr x
; CHECKS-NEXT: ldr w
; CHECKS-NEXT: ldr w
-; CHECKS-NEXT: ldrh w
-; CHECKS-NEXT: ldrh w
-; CHECKS-NEXT: ldrb w
-; CHECKS-NEXT: ldrb w
+; CHECKS-NEXT: ldrh w
+; CHECKS-NEXT: ldrh w
+; CHECKS-NEXT: ldrb w
+; CHECKS-NEXT: ldrb w
}
-define i1 @bcmp_bs(i8* %s1, i8* %s2) optsize {
+define i1 @test_bs(i8* %s1, i8* %s2) optsize {
entry:
%memcmp = call i32 @memcmp(i8* %s1, i8* %s2, i64 31)
%ret = icmp eq i32 %memcmp, 0
ret i1 %ret
-; CHECK-LABEL: bcmp_bs:
+; CHECK-LABEL: test_bs:
; CHECKN-NOT: bl memcmp
; CHECKN: ldp x
; CHECKN-NEXT: ldp x
diff --git a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
index f6efcdd7d852..ce2f93871359 100644
--- a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
+++ b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
@@ -35,8 +35,8 @@ define signext i32 @zeroEqualityTest02(i8* %x, i8* %y) {
define signext i32 @zeroEqualityTest01(i8* %x, i8* %y) {
; CHECK-LABEL: zeroEqualityTest01:
; CHECK: # %bb.0:
-; CHECK-NEXT: ld 5, 0(3)
-; CHECK-NEXT: ld 6, 0(4)
+; CHECK-NEXT: ldx 5, 0, 3
+; CHECK-NEXT: ldx 6, 0, 4
; CHECK-NEXT: cmpld 5, 6
; CHECK-NEXT: bne 0, .LBB1_2
; CHECK-NEXT: # %bb.1: # %loadbb1
@@ -125,7 +125,7 @@ define signext i32 @equalityFoldTwoConstants() {
define signext i32 @equalityFoldOneConstant(i8* %X) {
; CHECK-LABEL: equalityFoldOneConstant:
; CHECK: # %bb.0:
-; CHECK-NEXT: ld 4, 0(3)
+; CHECK-NEXT: ldx 4, 0, 3
; CHECK-NEXT: li 5, 1
; CHECK-NEXT: sldi 5, 5, 32
; CHECK-NEXT: cmpld 4, 5
diff --git a/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll b/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll
index 298ce90b74ee..181683e7a7f4 100644
--- a/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll
+++ b/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll
@@ -8,8 +8,8 @@
define zeroext i1 @opeq1(
; PPC64LE-LABEL: opeq1:
; PPC64LE: # %bb.0: # %"entry+land.rhs.i"
-; PPC64LE-NEXT: ld 3, 0(3)
-; PPC64LE-NEXT: ld 4, 0(4)
+; PPC64LE-NEXT: ldx 3, 0, 3
+; PPC64LE-NEXT: ldx 4, 0, 4
; PPC64LE-NEXT: xor 3, 3, 4
; PPC64LE-NEXT: cntlzd 3, 3
; PPC64LE-NEXT: rldicl 3, 3, 58, 63
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll
index dd3bb3973754..b8cfe04d43cb 100644
--- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll
@@ -9,8 +9,8 @@ define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-LABEL: @cmp2(
; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
-; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
-; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
+; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 1
+; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 1
; ALL-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
; ALL-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
; ALL-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
@@ -26,8 +26,8 @@ define i32 @cmp2_align2(i8* nocapture readonly align 2 %x, i8* nocapture readonl
; ALL-LABEL: @cmp2_align2(
; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
-; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
-; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
+; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 2
+; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 2
; ALL-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
; ALL-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
; ALL-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
@@ -49,8 +49,8 @@ define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL: loadbb:
; ALL-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i16*
; ALL-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i16*
-; ALL-NEXT: [[TMP5:%.*]] = load i16, i16* [[TMP3]]
-; ALL-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP4]]
+; ALL-NEXT: [[TMP5:%.*]] = load i16, i16* [[TMP3]], align 1
+; ALL-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP4]], align 1
; ALL-NEXT: [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]])
; ALL-NEXT: [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]])
; ALL-NEXT: [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]]
@@ -58,8 +58,8 @@ define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL: loadbb1:
; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 2
; ALL-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 2
-; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]]
-; ALL-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]]
+; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]], align 1
+; ALL-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]], align 1
; ALL-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
; ALL-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
; ALL-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]
@@ -76,8 +76,8 @@ define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-LABEL: @cmp4(
; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; ALL-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
; ALL-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
; ALL-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]]
@@ -101,8 +101,8 @@ define i32 @cmp5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL: loadbb:
; ALL-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i32*
; ALL-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; ALL-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]]
-; ALL-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]]
+; ALL-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 1
+; ALL-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 1
; ALL-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
; ALL-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
@@ -110,8 +110,8 @@ define i32 @cmp5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL: loadbb1:
; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 4
; ALL-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 4
-; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]]
-; ALL-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]]
+; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]], align 1
+; ALL-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]], align 1
; ALL-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
; ALL-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
; ALL-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]
@@ -136,8 +136,8 @@ define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL: loadbb:
; ALL-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i32*
; ALL-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; ALL-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]]
-; ALL-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]]
+; ALL-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 1
+; ALL-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 1
; ALL-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
; ALL-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
@@ -147,8 +147,8 @@ define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; ALL-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i16*
; ALL-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i16*
-; ALL-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]]
-; ALL-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]]
+; ALL-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]], align 1
+; ALL-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]], align 1
; ALL-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
; ALL-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
; ALL-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32
@@ -184,8 +184,8 @@ define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32: loadbb:
; X32-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X32-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]]
-; X32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]]
+; X32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 1
+; X32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 1
; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
@@ -195,8 +195,8 @@ define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X32-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32*
; X32-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i32*
-; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]]
-; X32-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]]
+; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]], align 1
+; X32-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 1
; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
; X32-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])
; X32-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]]
@@ -208,8 +208,8 @@ define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64-LABEL: @cmp8(
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
; X64-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
; X64-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP5]], [[TMP6]]
@@ -237,8 +237,8 @@ define i32 @cmp9(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64: loadbb:
; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]]
-; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]]
+; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]], align 1
+; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 1
; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
@@ -246,8 +246,8 @@ define i32 @cmp9(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64: loadbb1:
; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 8
; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 8
-; X64-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]]
-; X64-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]]
+; X64-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]], align 1
+; X64-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]], align 1
; X64-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
; X64-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]
@@ -276,8 +276,8 @@ define i32 @cmp10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64: loadbb:
; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]]
-; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]]
+; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]], align 1
+; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 1
; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
@@ -287,8 +287,8 @@ define i32 @cmp10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i16*
; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i16*
-; X64-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]]
-; X64-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]]
+; X64-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]], align 1
+; X64-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]], align 1
; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
; X64-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i64
@@ -328,8 +328,8 @@ define i32 @cmp12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64: loadbb:
; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]]
-; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]]
+; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]], align 1
+; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 1
; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
@@ -339,8 +339,8 @@ define i32 @cmp12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32*
; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i32*
-; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]]
-; X64-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]]
+; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]], align 1
+; X64-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 1
; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
; X64-NEXT: [[TMP17:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])
; X64-NEXT: [[TMP18]] = zext i32 [[TMP16]] to i64
@@ -398,8 +398,8 @@ define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64: loadbb:
; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]]
-; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]]
+; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]], align 1
+; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 1
; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
@@ -409,8 +409,8 @@ define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i64*
; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i64*
-; X64-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP12]]
-; X64-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP13]]
+; X64-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP12]], align 1
+; X64-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP13]], align 1
; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]])
; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]])
; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]]
@@ -427,8 +427,8 @@ define i32 @cmp_eq2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-LABEL: @cmp_eq2(
; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
-; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
-; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
+; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 1
+; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 1
; ALL-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
; ALL-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
@@ -445,13 +445,13 @@ define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-LABEL: @cmp_eq3(
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
-; X32-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
-; X32-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
+; X32-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 1
+; X32-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 1
; X32-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 2
; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 2
-; X32-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
-; X32-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X32-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
+; X32-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16
; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16
; X32-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]]
@@ -469,15 +469,15 @@ define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
+; X64_1LD-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 1
+; X64_1LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 2
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 2
-; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
-; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
+; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@@ -489,13 +489,13 @@ define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq3(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
+; X64_2LD-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 1
+; X64_2LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 2
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 2
-; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
-; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
+; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16
; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]]
@@ -516,8 +516,8 @@ define i32 @cmp_eq4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-LABEL: @cmp_eq4(
; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; ALL-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
; ALL-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
@@ -534,13 +534,13 @@ define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-LABEL: @cmp_eq5(
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
-; X32-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
-; X32-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X32-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
+; X32-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32
; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32
; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
@@ -558,15 +558,15 @@ define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
-; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
-; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
+; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@@ -578,13 +578,13 @@ define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq5(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
-; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
-; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
+; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32
; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
@@ -605,15 +605,15 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-LABEL: @cmp_eq6(
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
-; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
-; X32-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
+; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 1
+; X32-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 1
; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
@@ -631,8 +631,8 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
@@ -640,8 +640,8 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
+; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 1
+; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 1
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@@ -653,15 +653,15 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq6(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 1
+; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 1
; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
@@ -682,15 +682,15 @@ define i32 @cmp_eq6_align4(i8* nocapture readonly align 4 %x, i8* nocapture read
; X32-LABEL: @cmp_eq6_align4(
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4
+; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4
; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
-; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
-; X32-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
+; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 4
+; X32-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 4
; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
@@ -708,8 +708,8 @@ define i32 @cmp_eq6_align4(i8* nocapture readonly align 4 %x, i8* nocapture read
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4
+; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
@@ -717,8 +717,8 @@ define i32 @cmp_eq6_align4(i8* nocapture readonly align 4 %x, i8* nocapture read
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
+; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 4
+; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 4
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@@ -730,15 +730,15 @@ define i32 @cmp_eq6_align4(i8* nocapture readonly align 4 %x, i8* nocapture read
; X64_2LD-LABEL: @cmp_eq6_align4(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4
+; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 4
+; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 4
; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
@@ -759,15 +759,15 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-LABEL: @cmp_eq7(
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3
; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3
; X32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32*
; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32*
-; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
-; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
+; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1
+; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1
; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
@@ -783,8 +783,8 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
@@ -792,8 +792,8 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3
; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32*
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32*
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
+; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1
+; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@@ -805,15 +805,15 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq7(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3
; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32*
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32*
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1
+; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
@@ -832,15 +832,15 @@ define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-LABEL: @cmp_eq8(
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32*
; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32*
-; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
-; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
+; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1
+; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1
; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
@@ -852,8 +852,8 @@ define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64-LABEL: @cmp_eq8(
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
@@ -880,15 +880,15 @@ define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8
-; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
-; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
+; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@@ -900,13 +900,13 @@ define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq9(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8
-; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
-; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
+; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i64
; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i64
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
@@ -937,8 +937,8 @@ define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
@@ -946,8 +946,8 @@ define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
+; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 1
+; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 1
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@@ -959,15 +959,15 @@ define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq10(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 1
+; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 1
; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i64
; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i64
; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]]
@@ -998,8 +998,8 @@ define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
@@ -1007,8 +1007,8 @@ define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3
; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]]
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
+; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@@ -1020,15 +1020,15 @@ define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq11(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3
; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]]
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
+; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
@@ -1057,8 +1057,8 @@ define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
@@ -1066,8 +1066,8 @@ define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32*
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32*
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
+; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1
+; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@@ -1079,15 +1079,15 @@ define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq12(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32*
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32*
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1
+; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1
; X64_2LD-NEXT: [[TMP12:%.*]] = zext i32 [[TMP10]] to i64
; X64_2LD-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64
; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]]
@@ -1118,8 +1118,8 @@ define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
@@ -1127,8 +1127,8 @@ define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 5
; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]]
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
+; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@@ -1140,15 +1140,15 @@ define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq13(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 5
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 5
; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]]
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
+; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
@@ -1177,8 +1177,8 @@ define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
@@ -1186,8 +1186,8 @@ define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 6
; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]]
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
+; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@@ -1199,15 +1199,15 @@ define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq14(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 6
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 6
; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]]
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
+; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
@@ -1236,8 +1236,8 @@ define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
@@ -1245,8 +1245,8 @@ define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 7
; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
-; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]]
-; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
+; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@@ -1258,15 +1258,15 @@ define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq15(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 7
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 7
; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
-; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]]
-; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
+; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
@@ -1291,8 +1291,8 @@ define i32 @cmp_eq16(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64-LABEL: @cmp_eq16(
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i128*
; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i128*
-; X64-NEXT: [[TMP3:%.*]] = load i128, i128* [[TMP1]]
-; X64-NEXT: [[TMP4:%.*]] = load i128, i128* [[TMP2]]
+; X64-NEXT: [[TMP3:%.*]] = load i128, i128* [[TMP1]], align 1
+; X64-NEXT: [[TMP4:%.*]] = load i128, i128* [[TMP2]], align 1
; X64-NEXT: [[TMP5:%.*]] = icmp ne i128 [[TMP3]], [[TMP4]]
; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
More information about the llvm-commits
mailing list