[llvm] 7aecf23 - [ExpandMemCmp] Correctly set alignment of generated loads

Juneyoung Lee via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 16 06:39:57 PDT 2020


Author: Juneyoung Lee
Date: 2020-03-16T22:39:48+09:00
New Revision: 7aecf2323c4ef007ed443d9a58703fe08815b805

URL: https://github.com/llvm/llvm-project/commit/7aecf2323c4ef007ed443d9a58703fe08815b805
DIFF: https://github.com/llvm/llvm-project/commit/7aecf2323c4ef007ed443d9a58703fe08815b805.diff

LOG: [ExpandMemCmp] Correctly set alignment of generated loads

Summary:
This is a part of the series of efforts for correcting alignment of memory operations.
(Another related bugs: https://bugs.llvm.org/show_bug.cgi?id=44388 , https://bugs.llvm.org/show_bug.cgi?id=44543 )

This fixes https://bugs.llvm.org/show_bug.cgi?id=43880 by giving default alignment of loads to 1.

The test CodeGen/AArch64/bcmp-inline-small.ll should have been changed; it was introduced by https://reviews.llvm.org/D64805 . I talked with @evandro, and confirmed that the test is okay to be changed.
Other two tests from PowerPC needed changes as well, but fixes were straightforward.

Reviewers: courbet

Reviewed By: courbet

Subscribers: nlopes, gchatelet, wuzish, nemanjai, kristof.beyls, hiraditya, steven.zhang, danielkiss, llvm-commits, evandro

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D76113

Added: 
    

Modified: 
    llvm/lib/CodeGen/ExpandMemCmp.cpp
    llvm/test/CodeGen/AArch64/bcmp-inline-small.ll
    llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
    llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll
    llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 213416d08610..7cf3f0863e5b 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -273,6 +273,8 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
   // Get the memory source at offset `OffsetBytes`.
   Value *LhsSource = CI->getArgOperand(0);
   Value *RhsSource = CI->getArgOperand(1);
+  Align LhsAlign = LhsSource->getPointerAlignment(DL).valueOrOne();
+  Align RhsAlign = RhsSource->getPointerAlignment(DL).valueOrOne();
   if (OffsetBytes > 0) {
     auto *ByteType = Type::getInt8Ty(CI->getContext());
     LhsSource = Builder.CreateConstGEP1_64(
@@ -281,6 +283,8 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
     RhsSource = Builder.CreateConstGEP1_64(
         ByteType, Builder.CreateBitCast(RhsSource, ByteType->getPointerTo()),
         OffsetBytes);
+    LhsAlign = commonAlignment(LhsAlign, OffsetBytes);
+    RhsAlign = commonAlignment(RhsAlign, OffsetBytes);
   }
   LhsSource = Builder.CreateBitCast(LhsSource, LoadSizeType->getPointerTo());
   RhsSource = Builder.CreateBitCast(RhsSource, LoadSizeType->getPointerTo());
@@ -290,13 +294,13 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
   if (auto *C = dyn_cast<Constant>(LhsSource))
     Lhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
   if (!Lhs)
-    Lhs = Builder.CreateLoad(LoadSizeType, LhsSource);
+    Lhs = Builder.CreateAlignedLoad(LoadSizeType, LhsSource, LhsAlign);
 
   Value *Rhs = nullptr;
   if (auto *C = dyn_cast<Constant>(RhsSource))
     Rhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
   if (!Rhs)
-    Rhs = Builder.CreateLoad(LoadSizeType, RhsSource);
+    Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign);
 
   // Swap bytes if required.
   if (NeedsBSwap) {

diff  --git a/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll b/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll
index da42b1d6863c..a7d08565c4c4 100644
--- a/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll
+++ b/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll
@@ -4,13 +4,28 @@
 declare i32 @bcmp(i8*, i8*, i64) nounwind readonly
 declare i32 @memcmp(i8*, i8*, i64) nounwind readonly
 
-define i1 @bcmp_b2(i8* %s1, i8* %s2) {
+define i1 @test_b2(i8* %s1, i8* %s2) {
 entry:
   %bcmp = call i32 @bcmp(i8* %s1, i8* %s2, i64 15)
   %ret = icmp eq i32 %bcmp, 0
   ret i1 %ret
 
-; CHECK-LABEL: bcmp_b2:
+; CHECK-LABEL: test_b2:
+; CHECK-NOT:   bl bcmp
+; CHECKN:      ldr  x
+; CHECKN-NEXT: ldr  x
+; CHECKN-NEXT: ldur x
+; CHECKN-NEXT: ldur x
+; CHECKS-COUNT-30: ldrb w
+}
+
+define i1 @test_b2_align8(i8* align 8 %s1, i8* align 8 %s2) {
+entry:
+  %bcmp = call i32 @bcmp(i8* %s1, i8* %s2, i64 15)
+  %ret = icmp eq i32 %bcmp, 0
+  ret i1 %ret
+
+; CHECK-LABEL: test_b2_align8:
 ; CHECK-NOT:   bl bcmp
 ; CHECKN:      ldr  x
 ; CHECKN-NEXT: ldr  x
@@ -20,19 +35,19 @@ entry:
 ; CHECKS-NEXT: ldr  x
 ; CHECKS-NEXT: ldr  w
 ; CHECKS-NEXT: ldr  w
-; CHECKS-NEXT: ldrh w
-; CHECKS-NEXT: ldrh w
-; CHECKS-NEXT: ldrb w
-; CHECKS-NEXT: ldrb w
+; CHECKS-NEXT: ldrh  w
+; CHECKS-NEXT: ldrh  w
+; CHECKS-NEXT: ldrb  w
+; CHECKS-NEXT: ldrb  w
 }
 
-define i1 @bcmp_bs(i8* %s1, i8* %s2) optsize {
+define i1 @test_bs(i8* %s1, i8* %s2) optsize {
 entry:
   %memcmp = call i32 @memcmp(i8* %s1, i8* %s2, i64 31)
   %ret = icmp eq i32 %memcmp, 0
   ret i1 %ret
 
-; CHECK-LABEL: bcmp_bs:
+; CHECK-LABEL: test_bs:
 ; CHECKN-NOT:  bl memcmp
 ; CHECKN:      ldp  x
 ; CHECKN-NEXT: ldp  x

diff  --git a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
index f6efcdd7d852..ce2f93871359 100644
--- a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
+++ b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
@@ -35,8 +35,8 @@ define signext i32 @zeroEqualityTest02(i8* %x, i8* %y) {
 define signext i32 @zeroEqualityTest01(i8* %x, i8* %y) {
 ; CHECK-LABEL: zeroEqualityTest01:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    ld 5, 0(3)
-; CHECK-NEXT:    ld 6, 0(4)
+; CHECK-NEXT:    ldx 5, 0, 3
+; CHECK-NEXT:    ldx 6, 0, 4
 ; CHECK-NEXT:    cmpld 5, 6
 ; CHECK-NEXT:    bne 0, .LBB1_2
 ; CHECK-NEXT:  # %bb.1: # %loadbb1
@@ -125,7 +125,7 @@ define signext i32 @equalityFoldTwoConstants() {
 define signext i32 @equalityFoldOneConstant(i8* %X) {
 ; CHECK-LABEL: equalityFoldOneConstant:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    ld 4, 0(3)
+; CHECK-NEXT:    ldx 4, 0, 3
 ; CHECK-NEXT:    li 5, 1
 ; CHECK-NEXT:    sldi 5, 5, 32
 ; CHECK-NEXT:    cmpld 4, 5

diff  --git a/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll b/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll
index 298ce90b74ee..181683e7a7f4 100644
--- a/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll
+++ b/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll
@@ -8,8 +8,8 @@
 define zeroext i1 @opeq1(
 ; PPC64LE-LABEL: opeq1:
 ; PPC64LE:       # %bb.0: # %"entry+land.rhs.i"
-; PPC64LE-NEXT:    ld 3, 0(3)
-; PPC64LE-NEXT:    ld 4, 0(4)
+; PPC64LE-NEXT:    ldx 3, 0, 3
+; PPC64LE-NEXT:    ldx 4, 0, 4
 ; PPC64LE-NEXT:    xor 3, 3, 4
 ; PPC64LE-NEXT:    cntlzd 3, 3
 ; PPC64LE-NEXT:    rldicl 3, 3, 58, 63

diff  --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll
index dd3bb3973754..b8cfe04d43cb 100644
--- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll
@@ -9,8 +9,8 @@ define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp2(
 ; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
 ; ALL-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
-; ALL-NEXT:    [[TMP3:%.*]] = load i16, i16* [[TMP1]]
-; ALL-NEXT:    [[TMP4:%.*]] = load i16, i16* [[TMP2]]
+; ALL-NEXT:    [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 1
+; ALL-NEXT:    [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 1
 ; ALL-NEXT:    [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
 ; ALL-NEXT:    [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
 ; ALL-NEXT:    [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
@@ -26,8 +26,8 @@ define i32 @cmp2_align2(i8* nocapture readonly align 2 %x, i8* nocapture readonl
 ; ALL-LABEL: @cmp2_align2(
 ; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
 ; ALL-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
-; ALL-NEXT:    [[TMP3:%.*]] = load i16, i16* [[TMP1]]
-; ALL-NEXT:    [[TMP4:%.*]] = load i16, i16* [[TMP2]]
+; ALL-NEXT:    [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 2
+; ALL-NEXT:    [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 2
 ; ALL-NEXT:    [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
 ; ALL-NEXT:    [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
 ; ALL-NEXT:    [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
@@ -49,8 +49,8 @@ define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL:       loadbb:
 ; ALL-NEXT:    [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i16*
 ; ALL-NEXT:    [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i16*
-; ALL-NEXT:    [[TMP5:%.*]] = load i16, i16* [[TMP3]]
-; ALL-NEXT:    [[TMP6:%.*]] = load i16, i16* [[TMP4]]
+; ALL-NEXT:    [[TMP5:%.*]] = load i16, i16* [[TMP3]], align 1
+; ALL-NEXT:    [[TMP6:%.*]] = load i16, i16* [[TMP4]], align 1
 ; ALL-NEXT:    [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]])
 ; ALL-NEXT:    [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]])
 ; ALL-NEXT:    [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]]
@@ -58,8 +58,8 @@ define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL:       loadbb1:
 ; ALL-NEXT:    [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 2
 ; ALL-NEXT:    [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 2
-; ALL-NEXT:    [[TMP12:%.*]] = load i8, i8* [[TMP10]]
-; ALL-NEXT:    [[TMP13:%.*]] = load i8, i8* [[TMP11]]
+; ALL-NEXT:    [[TMP12:%.*]] = load i8, i8* [[TMP10]], align 1
+; ALL-NEXT:    [[TMP13:%.*]] = load i8, i8* [[TMP11]], align 1
 ; ALL-NEXT:    [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
 ; ALL-NEXT:    [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
 ; ALL-NEXT:    [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]
@@ -76,8 +76,8 @@ define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp4(
 ; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
 ; ALL-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; ALL-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; ALL-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; ALL-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; ALL-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
 ; ALL-NEXT:    [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
 ; ALL-NEXT:    [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
 ; ALL-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]]
@@ -101,8 +101,8 @@ define i32 @cmp5(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL:       loadbb:
 ; ALL-NEXT:    [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i32*
 ; ALL-NEXT:    [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; ALL-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP3]]
-; ALL-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP4]]
+; ALL-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 1
+; ALL-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 1
 ; ALL-NEXT:    [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
 ; ALL-NEXT:    [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
 ; ALL-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
@@ -110,8 +110,8 @@ define i32 @cmp5(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL:       loadbb1:
 ; ALL-NEXT:    [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 4
 ; ALL-NEXT:    [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 4
-; ALL-NEXT:    [[TMP12:%.*]] = load i8, i8* [[TMP10]]
-; ALL-NEXT:    [[TMP13:%.*]] = load i8, i8* [[TMP11]]
+; ALL-NEXT:    [[TMP12:%.*]] = load i8, i8* [[TMP10]], align 1
+; ALL-NEXT:    [[TMP13:%.*]] = load i8, i8* [[TMP11]], align 1
 ; ALL-NEXT:    [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
 ; ALL-NEXT:    [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
 ; ALL-NEXT:    [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]
@@ -136,8 +136,8 @@ define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL:       loadbb:
 ; ALL-NEXT:    [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i32*
 ; ALL-NEXT:    [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; ALL-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP3]]
-; ALL-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP4]]
+; ALL-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 1
+; ALL-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 1
 ; ALL-NEXT:    [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
 ; ALL-NEXT:    [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
 ; ALL-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
@@ -147,8 +147,8 @@ define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-NEXT:    [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 4
 ; ALL-NEXT:    [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i16*
 ; ALL-NEXT:    [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i16*
-; ALL-NEXT:    [[TMP14:%.*]] = load i16, i16* [[TMP12]]
-; ALL-NEXT:    [[TMP15:%.*]] = load i16, i16* [[TMP13]]
+; ALL-NEXT:    [[TMP14:%.*]] = load i16, i16* [[TMP12]], align 1
+; ALL-NEXT:    [[TMP15:%.*]] = load i16, i16* [[TMP13]], align 1
 ; ALL-NEXT:    [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
 ; ALL-NEXT:    [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
 ; ALL-NEXT:    [[TMP18]] = zext i16 [[TMP16]] to i32
@@ -184,8 +184,8 @@ define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X32:       loadbb:
 ; X32-NEXT:    [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i32*
 ; X32-NEXT:    [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP3]]
-; X32-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP4]]
+; X32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 1
+; X32-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 1
 ; X32-NEXT:    [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
 ; X32-NEXT:    [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
 ; X32-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
@@ -195,8 +195,8 @@ define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X32-NEXT:    [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 4
 ; X32-NEXT:    [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32*
 ; X32-NEXT:    [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i32*
-; X32-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP12]]
-; X32-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP13]]
+; X32-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP12]], align 1
+; X32-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 1
 ; X32-NEXT:    [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
 ; X32-NEXT:    [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])
 ; X32-NEXT:    [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]]
@@ -208,8 +208,8 @@ define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64-LABEL: @cmp8(
 ; X64-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
 ; X64-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
 ; X64-NEXT:    [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
 ; X64-NEXT:    [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
 ; X64-NEXT:    [[TMP7:%.*]] = icmp ugt i64 [[TMP5]], [[TMP6]]
@@ -237,8 +237,8 @@ define i32 @cmp9(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64:       loadbb:
 ; X64-NEXT:    [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64*
 ; X64-NEXT:    [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP3]]
-; X64-NEXT:    [[TMP6:%.*]] = load i64, i64* [[TMP4]]
+; X64-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP3]], align 1
+; X64-NEXT:    [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 1
 ; X64-NEXT:    [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
 ; X64-NEXT:    [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
 ; X64-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
@@ -246,8 +246,8 @@ define i32 @cmp9(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64:       loadbb1:
 ; X64-NEXT:    [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 8
 ; X64-NEXT:    [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 8
-; X64-NEXT:    [[TMP12:%.*]] = load i8, i8* [[TMP10]]
-; X64-NEXT:    [[TMP13:%.*]] = load i8, i8* [[TMP11]]
+; X64-NEXT:    [[TMP12:%.*]] = load i8, i8* [[TMP10]], align 1
+; X64-NEXT:    [[TMP13:%.*]] = load i8, i8* [[TMP11]], align 1
 ; X64-NEXT:    [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
 ; X64-NEXT:    [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
 ; X64-NEXT:    [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]
@@ -276,8 +276,8 @@ define i32 @cmp10(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64:       loadbb:
 ; X64-NEXT:    [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64*
 ; X64-NEXT:    [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP3]]
-; X64-NEXT:    [[TMP6:%.*]] = load i64, i64* [[TMP4]]
+; X64-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP3]], align 1
+; X64-NEXT:    [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 1
 ; X64-NEXT:    [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
 ; X64-NEXT:    [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
 ; X64-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
@@ -287,8 +287,8 @@ define i32 @cmp10(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64-NEXT:    [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 8
 ; X64-NEXT:    [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i16*
 ; X64-NEXT:    [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i16*
-; X64-NEXT:    [[TMP14:%.*]] = load i16, i16* [[TMP12]]
-; X64-NEXT:    [[TMP15:%.*]] = load i16, i16* [[TMP13]]
+; X64-NEXT:    [[TMP14:%.*]] = load i16, i16* [[TMP12]], align 1
+; X64-NEXT:    [[TMP15:%.*]] = load i16, i16* [[TMP13]], align 1
 ; X64-NEXT:    [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
 ; X64-NEXT:    [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
 ; X64-NEXT:    [[TMP18]] = zext i16 [[TMP16]] to i64
@@ -328,8 +328,8 @@ define i32 @cmp12(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64:       loadbb:
 ; X64-NEXT:    [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64*
 ; X64-NEXT:    [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP3]]
-; X64-NEXT:    [[TMP6:%.*]] = load i64, i64* [[TMP4]]
+; X64-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP3]], align 1
+; X64-NEXT:    [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 1
 ; X64-NEXT:    [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
 ; X64-NEXT:    [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
 ; X64-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
@@ -339,8 +339,8 @@ define i32 @cmp12(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64-NEXT:    [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 8
 ; X64-NEXT:    [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32*
 ; X64-NEXT:    [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i32*
-; X64-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP12]]
-; X64-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP13]]
+; X64-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP12]], align 1
+; X64-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 1
 ; X64-NEXT:    [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
 ; X64-NEXT:    [[TMP17:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])
 ; X64-NEXT:    [[TMP18]] = zext i32 [[TMP16]] to i64
@@ -398,8 +398,8 @@ define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64:       loadbb:
 ; X64-NEXT:    [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64*
 ; X64-NEXT:    [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP3]]
-; X64-NEXT:    [[TMP6:%.*]] = load i64, i64* [[TMP4]]
+; X64-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP3]], align 1
+; X64-NEXT:    [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 1
 ; X64-NEXT:    [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
 ; X64-NEXT:    [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
 ; X64-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
@@ -409,8 +409,8 @@ define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64-NEXT:    [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 8
 ; X64-NEXT:    [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i64*
 ; X64-NEXT:    [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i64*
-; X64-NEXT:    [[TMP14:%.*]] = load i64, i64* [[TMP12]]
-; X64-NEXT:    [[TMP15:%.*]] = load i64, i64* [[TMP13]]
+; X64-NEXT:    [[TMP14:%.*]] = load i64, i64* [[TMP12]], align 1
+; X64-NEXT:    [[TMP15:%.*]] = load i64, i64* [[TMP13]], align 1
 ; X64-NEXT:    [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]])
 ; X64-NEXT:    [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]])
 ; X64-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]]
@@ -427,8 +427,8 @@ define i32 @cmp_eq2(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp_eq2(
 ; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
 ; ALL-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
-; ALL-NEXT:    [[TMP3:%.*]] = load i16, i16* [[TMP1]]
-; ALL-NEXT:    [[TMP4:%.*]] = load i16, i16* [[TMP2]]
+; ALL-NEXT:    [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 1
+; ALL-NEXT:    [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 1
 ; ALL-NEXT:    [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
 ; ALL-NEXT:    [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
 ; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
@@ -445,13 +445,13 @@ define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X32-LABEL: @cmp_eq3(
 ; X32-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
 ; X32-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
-; X32-NEXT:    [[TMP3:%.*]] = load i16, i16* [[TMP1]]
-; X32-NEXT:    [[TMP4:%.*]] = load i16, i16* [[TMP2]]
+; X32-NEXT:    [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 1
+; X32-NEXT:    [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 1
 ; X32-NEXT:    [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]]
 ; X32-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 2
 ; X32-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 2
-; X32-NEXT:    [[TMP8:%.*]] = load i8, i8* [[TMP6]]
-; X32-NEXT:    [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X32-NEXT:    [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
+; X32-NEXT:    [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
 ; X32-NEXT:    [[TMP10:%.*]] = zext i8 [[TMP8]] to i16
 ; X32-NEXT:    [[TMP11:%.*]] = zext i8 [[TMP9]] to i16
 ; X32-NEXT:    [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]]
@@ -469,15 +469,15 @@ define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_1LD:       loadbb:
 ; X64_1LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
 ; X64_1LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
-; X64_1LD-NEXT:    [[TMP3:%.*]] = load i16, i16* [[TMP1]]
-; X64_1LD-NEXT:    [[TMP4:%.*]] = load i16, i16* [[TMP2]]
+; X64_1LD-NEXT:    [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 1
+; X64_1LD-NEXT:    [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 1
 ; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
 ; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
 ; X64_1LD:       loadbb1:
 ; X64_1LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 2
 ; X64_1LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 2
-; X64_1LD-NEXT:    [[TMP8:%.*]] = load i8, i8* [[TMP6]]
-; X64_1LD-NEXT:    [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X64_1LD-NEXT:    [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
+; X64_1LD-NEXT:    [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
 ; X64_1LD-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
 ; X64_1LD-NEXT:    br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
 ; X64_1LD:       endblock:
@@ -489,13 +489,13 @@ define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_2LD-LABEL: @cmp_eq3(
 ; X64_2LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
 ; X64_2LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
-; X64_2LD-NEXT:    [[TMP3:%.*]] = load i16, i16* [[TMP1]]
-; X64_2LD-NEXT:    [[TMP4:%.*]] = load i16, i16* [[TMP2]]
+; X64_2LD-NEXT:    [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 1
+; X64_2LD-NEXT:    [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 1
 ; X64_2LD-NEXT:    [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]]
 ; X64_2LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 2
 ; X64_2LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 2
-; X64_2LD-NEXT:    [[TMP8:%.*]] = load i8, i8* [[TMP6]]
-; X64_2LD-NEXT:    [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X64_2LD-NEXT:    [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
+; X64_2LD-NEXT:    [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
 ; X64_2LD-NEXT:    [[TMP10:%.*]] = zext i8 [[TMP8]] to i16
 ; X64_2LD-NEXT:    [[TMP11:%.*]] = zext i8 [[TMP9]] to i16
 ; X64_2LD-NEXT:    [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]]
@@ -516,8 +516,8 @@ define i32 @cmp_eq4(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp_eq4(
 ; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
 ; ALL-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; ALL-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; ALL-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; ALL-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; ALL-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
 ; ALL-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
 ; ALL-NEXT:    [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
 ; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
@@ -534,13 +534,13 @@ define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X32-LABEL: @cmp_eq5(
 ; X32-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
 ; X32-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; X32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
 ; X32-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
 ; X32-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
 ; X32-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
-; X32-NEXT:    [[TMP8:%.*]] = load i8, i8* [[TMP6]]
-; X32-NEXT:    [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X32-NEXT:    [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
+; X32-NEXT:    [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
 ; X32-NEXT:    [[TMP10:%.*]] = zext i8 [[TMP8]] to i32
 ; X32-NEXT:    [[TMP11:%.*]] = zext i8 [[TMP9]] to i32
 ; X32-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
@@ -558,15 +558,15 @@ define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_1LD:       loadbb:
 ; X64_1LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
 ; X64_1LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X64_1LD-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X64_1LD-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_1LD-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; X64_1LD-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
 ; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
 ; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
 ; X64_1LD:       loadbb1:
 ; X64_1LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
 ; X64_1LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
-; X64_1LD-NEXT:    [[TMP8:%.*]] = load i8, i8* [[TMP6]]
-; X64_1LD-NEXT:    [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X64_1LD-NEXT:    [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
+; X64_1LD-NEXT:    [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
 ; X64_1LD-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
 ; X64_1LD-NEXT:    br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
 ; X64_1LD:       endblock:
@@ -578,13 +578,13 @@ define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_2LD-LABEL: @cmp_eq5(
 ; X64_2LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
 ; X64_2LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X64_2LD-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X64_2LD-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_2LD-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; X64_2LD-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
 ; X64_2LD-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
 ; X64_2LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
 ; X64_2LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
-; X64_2LD-NEXT:    [[TMP8:%.*]] = load i8, i8* [[TMP6]]
-; X64_2LD-NEXT:    [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X64_2LD-NEXT:    [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
+; X64_2LD-NEXT:    [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
 ; X64_2LD-NEXT:    [[TMP10:%.*]] = zext i8 [[TMP8]] to i32
 ; X64_2LD-NEXT:    [[TMP11:%.*]] = zext i8 [[TMP9]] to i32
 ; X64_2LD-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
@@ -605,15 +605,15 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X32-LABEL: @cmp_eq6(
 ; X32-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
 ; X32-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; X32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
 ; X32-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
 ; X32-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
 ; X32-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
 ; X32-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
 ; X32-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
-; X32-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP8]]
-; X32-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP9]]
+; X32-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 1
+; X32-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 1
 ; X32-NEXT:    [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
 ; X32-NEXT:    [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
 ; X32-NEXT:    [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
@@ -631,8 +631,8 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_1LD:       loadbb:
 ; X64_1LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
 ; X64_1LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X64_1LD-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X64_1LD-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_1LD-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; X64_1LD-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
 ; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
 ; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
 ; X64_1LD:       loadbb1:
@@ -640,8 +640,8 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_1LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
 ; X64_1LD-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
 ; X64_1LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
-; X64_1LD-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP8]]
-; X64_1LD-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP9]]
+; X64_1LD-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 1
+; X64_1LD-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 1
 ; X64_1LD-NEXT:    [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
 ; X64_1LD-NEXT:    br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
 ; X64_1LD:       endblock:
@@ -653,15 +653,15 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_2LD-LABEL: @cmp_eq6(
 ; X64_2LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
 ; X64_2LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X64_2LD-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X64_2LD-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_2LD-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; X64_2LD-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
 ; X64_2LD-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
 ; X64_2LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
 ; X64_2LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
 ; X64_2LD-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
 ; X64_2LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
-; X64_2LD-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP8]]
-; X64_2LD-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP9]]
+; X64_2LD-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 1
+; X64_2LD-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 1
 ; X64_2LD-NEXT:    [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
 ; X64_2LD-NEXT:    [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
 ; X64_2LD-NEXT:    [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
@@ -682,15 +682,15 @@ define i32 @cmp_eq6_align4(i8* nocapture readonly align 4 %x, i8* nocapture read
 ; X32-LABEL: @cmp_eq6_align4(
 ; X32-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
 ; X32-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4
+; X32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4
 ; X32-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
 ; X32-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
 ; X32-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
 ; X32-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
 ; X32-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
-; X32-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP8]]
-; X32-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP9]]
+; X32-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 4
+; X32-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 4
 ; X32-NEXT:    [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
 ; X32-NEXT:    [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
 ; X32-NEXT:    [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
@@ -708,8 +708,8 @@ define i32 @cmp_eq6_align4(i8* nocapture readonly align 4 %x, i8* nocapture read
 ; X64_1LD:       loadbb:
 ; X64_1LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
 ; X64_1LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X64_1LD-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X64_1LD-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_1LD-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4
+; X64_1LD-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4
 ; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
 ; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
 ; X64_1LD:       loadbb1:
@@ -717,8 +717,8 @@ define i32 @cmp_eq6_align4(i8* nocapture readonly align 4 %x, i8* nocapture read
 ; X64_1LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
 ; X64_1LD-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
 ; X64_1LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
-; X64_1LD-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP8]]
-; X64_1LD-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP9]]
+; X64_1LD-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 4
+; X64_1LD-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 4
 ; X64_1LD-NEXT:    [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
 ; X64_1LD-NEXT:    br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
 ; X64_1LD:       endblock:
@@ -730,15 +730,15 @@ define i32 @cmp_eq6_align4(i8* nocapture readonly align 4 %x, i8* nocapture read
 ; X64_2LD-LABEL: @cmp_eq6_align4(
 ; X64_2LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
 ; X64_2LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X64_2LD-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X64_2LD-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_2LD-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4
+; X64_2LD-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4
 ; X64_2LD-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
 ; X64_2LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
 ; X64_2LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
 ; X64_2LD-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
 ; X64_2LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
-; X64_2LD-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP8]]
-; X64_2LD-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP9]]
+; X64_2LD-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 4
+; X64_2LD-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 4
 ; X64_2LD-NEXT:    [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
 ; X64_2LD-NEXT:    [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
 ; X64_2LD-NEXT:    [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
@@ -759,15 +759,15 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X32-LABEL: @cmp_eq7(
 ; X32-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
 ; X32-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; X32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
 ; X32-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
 ; X32-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3
 ; X32-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3
 ; X32-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32*
 ; X32-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32*
-; X32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP8]]
-; X32-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP9]]
+; X32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1
+; X32-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1
 ; X32-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
 ; X32-NEXT:    [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
 ; X32-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
@@ -783,8 +783,8 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_1LD:       loadbb:
 ; X64_1LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
 ; X64_1LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X64_1LD-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X64_1LD-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_1LD-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; X64_1LD-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
 ; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
 ; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
 ; X64_1LD:       loadbb1:
@@ -792,8 +792,8 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_1LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3
 ; X64_1LD-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32*
 ; X64_1LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32*
-; X64_1LD-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP8]]
-; X64_1LD-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP9]]
+; X64_1LD-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1
+; X64_1LD-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1
 ; X64_1LD-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
 ; X64_1LD-NEXT:    br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
 ; X64_1LD:       endblock:
@@ -805,15 +805,15 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_2LD-LABEL: @cmp_eq7(
 ; X64_2LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
 ; X64_2LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X64_2LD-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X64_2LD-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_2LD-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; X64_2LD-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
 ; X64_2LD-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
 ; X64_2LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3
 ; X64_2LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3
 ; X64_2LD-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32*
 ; X64_2LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32*
-; X64_2LD-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP8]]
-; X64_2LD-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP9]]
+; X64_2LD-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1
+; X64_2LD-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1
 ; X64_2LD-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
 ; X64_2LD-NEXT:    [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
 ; X64_2LD-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
@@ -832,15 +832,15 @@ define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X32-LABEL: @cmp_eq8(
 ; X32-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
 ; X32-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
-; X32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
-; X32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; X32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
 ; X32-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
 ; X32-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
 ; X32-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
 ; X32-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32*
 ; X32-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32*
-; X32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP8]]
-; X32-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP9]]
+; X32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1
+; X32-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1
 ; X32-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
 ; X32-NEXT:    [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
 ; X32-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
@@ -852,8 +852,8 @@ define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64-LABEL: @cmp_eq8(
 ; X64-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
 ; X64-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
 ; X64-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
 ; X64-NEXT:    [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
 ; X64-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
@@ -880,15 +880,15 @@ define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_1LD:       loadbb:
 ; X64_1LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
 ; X64_1LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_1LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_1LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_1LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_1LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
 ; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
 ; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
 ; X64_1LD:       loadbb1:
 ; X64_1LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8
 ; X64_1LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8
-; X64_1LD-NEXT:    [[TMP8:%.*]] = load i8, i8* [[TMP6]]
-; X64_1LD-NEXT:    [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X64_1LD-NEXT:    [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
+; X64_1LD-NEXT:    [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
 ; X64_1LD-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
 ; X64_1LD-NEXT:    br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
 ; X64_1LD:       endblock:
@@ -900,13 +900,13 @@ define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_2LD-LABEL: @cmp_eq9(
 ; X64_2LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
 ; X64_2LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_2LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_2LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_2LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_2LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
 ; X64_2LD-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
 ; X64_2LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8
 ; X64_2LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8
-; X64_2LD-NEXT:    [[TMP8:%.*]] = load i8, i8* [[TMP6]]
-; X64_2LD-NEXT:    [[TMP9:%.*]] = load i8, i8* [[TMP7]]
+; X64_2LD-NEXT:    [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
+; X64_2LD-NEXT:    [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
 ; X64_2LD-NEXT:    [[TMP10:%.*]] = zext i8 [[TMP8]] to i64
 ; X64_2LD-NEXT:    [[TMP11:%.*]] = zext i8 [[TMP9]] to i64
 ; X64_2LD-NEXT:    [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
@@ -937,8 +937,8 @@ define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_1LD:       loadbb:
 ; X64_1LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
 ; X64_1LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_1LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_1LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_1LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_1LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
 ; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
 ; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
 ; X64_1LD:       loadbb1:
@@ -946,8 +946,8 @@ define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_1LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8
 ; X64_1LD-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
 ; X64_1LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
-; X64_1LD-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP8]]
-; X64_1LD-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP9]]
+; X64_1LD-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 1
+; X64_1LD-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 1
 ; X64_1LD-NEXT:    [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
 ; X64_1LD-NEXT:    br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
 ; X64_1LD:       endblock:
@@ -959,15 +959,15 @@ define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_2LD-LABEL: @cmp_eq10(
 ; X64_2LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
 ; X64_2LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_2LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_2LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_2LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_2LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
 ; X64_2LD-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
 ; X64_2LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8
 ; X64_2LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8
 ; X64_2LD-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
 ; X64_2LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
-; X64_2LD-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP8]]
-; X64_2LD-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP9]]
+; X64_2LD-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 1
+; X64_2LD-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 1
 ; X64_2LD-NEXT:    [[TMP12:%.*]] = zext i16 [[TMP10]] to i64
 ; X64_2LD-NEXT:    [[TMP13:%.*]] = zext i16 [[TMP11]] to i64
 ; X64_2LD-NEXT:    [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]]
@@ -998,8 +998,8 @@ define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_1LD:       loadbb:
 ; X64_1LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
 ; X64_1LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_1LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_1LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_1LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_1LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
 ; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
 ; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
 ; X64_1LD:       loadbb1:
@@ -1007,8 +1007,8 @@ define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_1LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3
 ; X64_1LD-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
 ; X64_1LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
-; X64_1LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP8]]
-; X64_1LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_1LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
+; X64_1LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
 ; X64_1LD-NEXT:    [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
 ; X64_1LD-NEXT:    br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
 ; X64_1LD:       endblock:
@@ -1020,15 +1020,15 @@ define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_2LD-LABEL: @cmp_eq11(
 ; X64_2LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
 ; X64_2LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_2LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_2LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_2LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_2LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
 ; X64_2LD-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
 ; X64_2LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3
 ; X64_2LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3
 ; X64_2LD-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
 ; X64_2LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
-; X64_2LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP8]]
-; X64_2LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_2LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
+; X64_2LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
 ; X64_2LD-NEXT:    [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
 ; X64_2LD-NEXT:    [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
 ; X64_2LD-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
@@ -1057,8 +1057,8 @@ define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_1LD:       loadbb:
 ; X64_1LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
 ; X64_1LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_1LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_1LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_1LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_1LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
 ; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
 ; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
 ; X64_1LD:       loadbb1:
@@ -1066,8 +1066,8 @@ define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_1LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8
 ; X64_1LD-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32*
 ; X64_1LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32*
-; X64_1LD-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP8]]
-; X64_1LD-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP9]]
+; X64_1LD-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1
+; X64_1LD-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1
 ; X64_1LD-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
 ; X64_1LD-NEXT:    br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
 ; X64_1LD:       endblock:
@@ -1079,15 +1079,15 @@ define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_2LD-LABEL: @cmp_eq12(
 ; X64_2LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
 ; X64_2LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_2LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_2LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_2LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_2LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
 ; X64_2LD-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
 ; X64_2LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8
 ; X64_2LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8
 ; X64_2LD-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32*
 ; X64_2LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32*
-; X64_2LD-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP8]]
-; X64_2LD-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP9]]
+; X64_2LD-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1
+; X64_2LD-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1
 ; X64_2LD-NEXT:    [[TMP12:%.*]] = zext i32 [[TMP10]] to i64
 ; X64_2LD-NEXT:    [[TMP13:%.*]] = zext i32 [[TMP11]] to i64
 ; X64_2LD-NEXT:    [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]]
@@ -1118,8 +1118,8 @@ define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_1LD:       loadbb:
 ; X64_1LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
 ; X64_1LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_1LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_1LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_1LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_1LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
 ; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
 ; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
 ; X64_1LD:       loadbb1:
@@ -1127,8 +1127,8 @@ define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_1LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 5
 ; X64_1LD-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
 ; X64_1LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
-; X64_1LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP8]]
-; X64_1LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_1LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
+; X64_1LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
 ; X64_1LD-NEXT:    [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
 ; X64_1LD-NEXT:    br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
 ; X64_1LD:       endblock:
@@ -1140,15 +1140,15 @@ define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_2LD-LABEL: @cmp_eq13(
 ; X64_2LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
 ; X64_2LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_2LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_2LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_2LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_2LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
 ; X64_2LD-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
 ; X64_2LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 5
 ; X64_2LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 5
 ; X64_2LD-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
 ; X64_2LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
-; X64_2LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP8]]
-; X64_2LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_2LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
+; X64_2LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
 ; X64_2LD-NEXT:    [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
 ; X64_2LD-NEXT:    [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
 ; X64_2LD-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
@@ -1177,8 +1177,8 @@ define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_1LD:       loadbb:
 ; X64_1LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
 ; X64_1LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_1LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_1LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_1LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_1LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
 ; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
 ; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
 ; X64_1LD:       loadbb1:
@@ -1186,8 +1186,8 @@ define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_1LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 6
 ; X64_1LD-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
 ; X64_1LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
-; X64_1LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP8]]
-; X64_1LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_1LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
+; X64_1LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
 ; X64_1LD-NEXT:    [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
 ; X64_1LD-NEXT:    br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
 ; X64_1LD:       endblock:
@@ -1199,15 +1199,15 @@ define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_2LD-LABEL: @cmp_eq14(
 ; X64_2LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
 ; X64_2LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_2LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_2LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_2LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_2LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
 ; X64_2LD-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
 ; X64_2LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 6
 ; X64_2LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 6
 ; X64_2LD-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
 ; X64_2LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
-; X64_2LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP8]]
-; X64_2LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_2LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
+; X64_2LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
 ; X64_2LD-NEXT:    [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
 ; X64_2LD-NEXT:    [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
 ; X64_2LD-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
@@ -1236,8 +1236,8 @@ define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_1LD:       loadbb:
 ; X64_1LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
 ; X64_1LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_1LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_1LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_1LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_1LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
 ; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
 ; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
 ; X64_1LD:       loadbb1:
@@ -1245,8 +1245,8 @@ define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_1LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 7
 ; X64_1LD-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
 ; X64_1LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
-; X64_1LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP8]]
-; X64_1LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_1LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
+; X64_1LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
 ; X64_1LD-NEXT:    [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
 ; X64_1LD-NEXT:    br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
 ; X64_1LD:       endblock:
@@ -1258,15 +1258,15 @@ define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64_2LD-LABEL: @cmp_eq15(
 ; X64_2LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
 ; X64_2LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
-; X64_2LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
-; X64_2LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_2LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; X64_2LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
 ; X64_2LD-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
 ; X64_2LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 7
 ; X64_2LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 7
 ; X64_2LD-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
 ; X64_2LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
-; X64_2LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP8]]
-; X64_2LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_2LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
+; X64_2LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
 ; X64_2LD-NEXT:    [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
 ; X64_2LD-NEXT:    [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
 ; X64_2LD-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
@@ -1291,8 +1291,8 @@ define i32 @cmp_eq16(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X64-LABEL: @cmp_eq16(
 ; X64-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i128*
 ; X64-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i128*
-; X64-NEXT:    [[TMP3:%.*]] = load i128, i128* [[TMP1]]
-; X64-NEXT:    [[TMP4:%.*]] = load i128, i128* [[TMP2]]
+; X64-NEXT:    [[TMP3:%.*]] = load i128, i128* [[TMP1]], align 1
+; X64-NEXT:    [[TMP4:%.*]] = load i128, i128* [[TMP2]], align 1
 ; X64-NEXT:    [[TMP5:%.*]] = icmp ne i128 [[TMP3]], [[TMP4]]
 ; X64-NEXT:    [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
 ; X64-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0


        


More information about the llvm-commits mailing list