[llvm] [ExpandMemCmp] Improve memcmp optimisation for boolean results (PR #71221)

Igor Kirillov via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 6 04:48:59 PST 2023


https://github.com/igogo-x86 updated https://github.com/llvm/llvm-project/pull/71221

>From 4848ed2255f875c86d1a8590c5a681bddf224428 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Fri, 3 Nov 2023 18:49:19 +0000
Subject: [PATCH 1/3] [ExpandMemCmp] Improve memcmp optimisation for boolean
 results

  This patch enhances the optimization of memcmp calls when only two outcomes
are needed and comparison fits into one block, for example:

	bool result = memcmp(a, b, 6) > 0;

  Previously, LLVM would generate unnecessary operations even when the user of
memcmp was only interested in a binary outcome.
---
 llvm/lib/CodeGen/ExpandMemCmp.cpp             |  45 +-
 llvm/test/CodeGen/AArch64/memcmp.ll           | 392 ++++++++++--------
 .../CodeGen/X86/memcmp-more-load-pairs-x32.ll |  18 +-
 .../CodeGen/X86/memcmp-more-load-pairs.ll     |  20 +-
 llvm/test/CodeGen/X86/memcmp-x32.ll           |  18 +-
 llvm/test/CodeGen/X86/memcmp.ll               |  20 +-
 6 files changed, 269 insertions(+), 244 deletions(-)

diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 28e258be226a695..a3dd0feea2ff969 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -31,6 +32,7 @@
 #include <optional>
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 namespace llvm {
 class TargetLowering;
@@ -656,6 +658,37 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
 
   const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,
                                      /*Offset*/ 0);
+
+  // If a user of memcmp cares only about two outcomes, for example:
+  //    bool result = memcmp(a, b, NBYTES) > 0;
+  // We can generate more optimal code with a smaller number of operations
+  if (auto *U = CI->getUniqueUndroppableUser()) {
+    auto *UI = cast<Instruction>(U);
+    ICmpInst::Predicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE;
+    uint64_t Shift;
+    bool NeedsZExt = false;
+    // This is a special case because instead of checking if the result is less than zero:
+    //    bool result = memcmp(a, b, NBYTES) < 0;
+    // Compiler is clever enough to generate the following code:
+    //    bool result = memcmp(a, b, NBYTES) >> 31;
+    if (match(UI, m_LShr(m_Value(), m_ConstantInt(Shift))) && Shift == CI->getType()->getIntegerBitWidth() - 1) {
+      Pred = ICmpInst::ICMP_SLT;
+      NeedsZExt = true;
+    } else {
+      // In case of a successful match this call will set `Pred` variable
+      match(UI, m_ICmp(Pred, m_Specific(CI), m_Zero()));
+    }
+    // Generate new code and remove the original memcmp call and the user
+    if (ICmpInst::isSigned(Pred)) {
+      Value *Cmp = Builder.CreateICmp(CmpInst::getUnsignedPredicate(Pred), Loads.Lhs, Loads.Rhs);
+      auto *Result = NeedsZExt ? Builder.CreateZExt(Cmp, U->getType()) : Cmp;
+      UI->replaceAllUsesWith(Result);
+      UI->eraseFromParent();
+      CI->eraseFromParent();
+      return nullptr;
+    }
+  }
+
   // The result of memcmp is negative, zero, or positive, so produce that by
   // subtracting 2 extended compare bits: sub (ugt, ult).
   // If a target prefers to use selects to get -1/0/1, they should be able
@@ -670,7 +703,7 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
 }
 
 // This function expands the memcmp call into an inline expansion and returns
-// the memcmp result.
+// the memcmp result. Returns nullptr if the memcmp is already replaced.
 Value *MemCmpExpansion::getMemCmpExpansion() {
   // Create the basic block framework for a multi-block expansion.
   if (getNumBlocks() != 1) {
@@ -838,11 +871,11 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
 
   NumMemCmpInlined++;
 
-  Value *Res = Expansion.getMemCmpExpansion();
-
-  // Replace call with result of expansion and erase call.
-  CI->replaceAllUsesWith(Res);
-  CI->eraseFromParent();
+  if (Value *Res = Expansion.getMemCmpExpansion()) {
+    // Replace call with result of expansion and erase call.
+    CI->replaceAllUsesWith(Res);
+    CI->eraseFromParent();
+  }
 
   return true;
 }
diff --git a/llvm/test/CodeGen/AArch64/memcmp.ll b/llvm/test/CodeGen/AArch64/memcmp.ll
index d13a416a28761ca..4da7c8c95a4e4f0 100644
--- a/llvm/test/CodeGen/AArch64/memcmp.ll
+++ b/llvm/test/CodeGen/AArch64/memcmp.ll
@@ -222,16 +222,28 @@ define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev w8, w8
 ; CHECK-NEXT:    rev w9, w9
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    cset w8, hi
-; CHECK-NEXT:    cset w9, lo
-; CHECK-NEXT:    sub w8, w8, w9
-; CHECK-NEXT:    lsr w0, w8, #31
+; CHECK-NEXT:    cset w0, lo
 ; CHECK-NEXT:    ret
   %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
   %c = icmp slt i32 %m, 0
   ret i1 %c
 }
 
+define i32 @length4_lt_32(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: length4_lt_32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr w8, [x0]
+; CHECK-NEXT:    ldr w9, [x1]
+; CHECK-NEXT:    rev w8, w8
+; CHECK-NEXT:    rev w9, w9
+; CHECK-NEXT:    cmp w8, w9
+; CHECK-NEXT:    cset w0, lo
+; CHECK-NEXT:    ret
+  %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+  %c = lshr i32 %m, 31
+  ret i32 %c
+}
+
 define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
 ; CHECK-LABEL: length4_gt:
 ; CHECK:       // %bb.0:
@@ -240,11 +252,7 @@ define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev w8, w8
 ; CHECK-NEXT:    rev w9, w9
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    cset w8, hi
-; CHECK-NEXT:    cset w9, lo
-; CHECK-NEXT:    sub w8, w8, w9
-; CHECK-NEXT:    cmp w8, #0
-; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    cset w0, hi
 ; CHECK-NEXT:    ret
   %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
   %c = icmp sgt i32 %m, 0
@@ -313,10 +321,7 @@ define i1 @length5_lt(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    cset w8, hi
-; CHECK-NEXT:    cset w9, lo
-; CHECK-NEXT:    sub w8, w8, w9
-; CHECK-NEXT:    lsr w0, w8, #31
+; CHECK-NEXT:    cset w0, lo
 ; CHECK-NEXT:    ret
   %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
   %c = icmp slt i32 %m, 0
@@ -343,6 +348,25 @@ define i32 @length6(ptr %X, ptr %Y) nounwind {
   ret i32 %m
 }
 
+define i32 @length6_lt(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: length6_lt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldrh w8, [x0, #4]
+; CHECK-NEXT:    ldr w9, [x0]
+; CHECK-NEXT:    ldrh w10, [x1, #4]
+; CHECK-NEXT:    ldr w11, [x1]
+; CHECK-NEXT:    orr x8, x9, x8, lsl #32
+; CHECK-NEXT:    orr x9, x11, x10, lsl #32
+; CHECK-NEXT:    rev x8, x8
+; CHECK-NEXT:    rev x9, x9
+; CHECK-NEXT:    cmp x8, x9
+; CHECK-NEXT:    cset w0, lo
+; CHECK-NEXT:    ret
+  %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 6) nounwind
+  %r = lshr i32 %m, 31
+  ret i32 %r
+}
+
 define i32 @length7(ptr %X, ptr %Y) nounwind {
 ; CHECK-LABEL: length7:
 ; CHECK:       // %bb.0:
@@ -351,18 +375,18 @@ define i32 @length7(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev w8, w8
 ; CHECK-NEXT:    rev w9, w9
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB22_3
+; CHECK-NEXT:    b.ne .LBB24_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldur w8, [x0, #3]
 ; CHECK-NEXT:    ldur w9, [x1, #3]
 ; CHECK-NEXT:    rev w8, w8
 ; CHECK-NEXT:    rev w9, w9
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB22_3
+; CHECK-NEXT:    b.ne .LBB24_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB22_3: // %res_block
+; CHECK-NEXT:  .LBB24_3: // %res_block
 ; CHECK-NEXT:    cmp w8, w9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
@@ -379,18 +403,18 @@ define i1 @length7_lt(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev w8, w8
 ; CHECK-NEXT:    rev w9, w9
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB23_3
+; CHECK-NEXT:    b.ne .LBB25_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldur w8, [x0, #3]
 ; CHECK-NEXT:    ldur w9, [x1, #3]
 ; CHECK-NEXT:    rev w8, w8
 ; CHECK-NEXT:    rev w9, w9
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB23_3
+; CHECK-NEXT:    b.ne .LBB25_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    lsr w0, wzr, #31
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB23_3: // %res_block
+; CHECK-NEXT:  .LBB25_3: // %res_block
 ; CHECK-NEXT:    cmp w8, w9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
@@ -470,13 +494,13 @@ define i32 @length9(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB28_2
+; CHECK-NEXT:    b.ne .LBB30_2
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldrb w8, [x0, #8]
 ; CHECK-NEXT:    ldrb w9, [x1, #8]
 ; CHECK-NEXT:    sub w0, w8, w9
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB28_2: // %res_block
+; CHECK-NEXT:  .LBB30_2: // %res_block
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
 ; CHECK-NEXT:    ret
@@ -508,7 +532,7 @@ define i32 @length10(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB30_3
+; CHECK-NEXT:    b.ne .LBB32_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldrh w8, [x0, #8]
 ; CHECK-NEXT:    ldrh w9, [x1, #8]
@@ -517,11 +541,11 @@ define i32 @length10(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    lsr w9, w9, #16
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB30_3
+; CHECK-NEXT:    b.ne .LBB32_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB30_3: // %res_block
+; CHECK-NEXT:  .LBB32_3: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
@@ -554,18 +578,18 @@ define i32 @length11(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB32_3
+; CHECK-NEXT:    b.ne .LBB34_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldur x8, [x0, #3]
 ; CHECK-NEXT:    ldur x9, [x1, #3]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB32_3
+; CHECK-NEXT:    b.ne .LBB34_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB32_3: // %res_block
+; CHECK-NEXT:  .LBB34_3: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
@@ -614,18 +638,18 @@ define i32 @length12(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB35_3
+; CHECK-NEXT:    b.ne .LBB37_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr w8, [x0, #8]
 ; CHECK-NEXT:    ldr w9, [x1, #8]
 ; CHECK-NEXT:    rev w8, w8
 ; CHECK-NEXT:    rev w9, w9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB35_3
+; CHECK-NEXT:    b.ne .LBB37_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB35_3: // %res_block
+; CHECK-NEXT:  .LBB37_3: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
@@ -674,18 +698,18 @@ define i32 @length15(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB38_3
+; CHECK-NEXT:    b.ne .LBB40_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldur x8, [x0, #7]
 ; CHECK-NEXT:    ldur x9, [x1, #7]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB38_3
+; CHECK-NEXT:    b.ne .LBB40_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB38_3: // %res_block
+; CHECK-NEXT:  .LBB40_3: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
@@ -702,18 +726,18 @@ define i1 @length15_lt(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB39_3
+; CHECK-NEXT:    b.ne .LBB41_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldur x8, [x0, #7]
 ; CHECK-NEXT:    ldur x9, [x1, #7]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB39_3
+; CHECK-NEXT:    b.ne .LBB41_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    lsr w0, wzr, #31
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB39_3: // %res_block
+; CHECK-NEXT:  .LBB41_3: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
@@ -734,7 +758,7 @@ define i32 @length15_const(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    movk x8, #12594, lsl #48
 ; CHECK-NEXT:    cmp x9, x8
-; CHECK-NEXT:    b.ne .LBB40_3
+; CHECK-NEXT:    b.ne .LBB42_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    mov x8, #13365 // =0x3435
 ; CHECK-NEXT:    ldur x9, [x0, #7]
@@ -743,11 +767,11 @@ define i32 @length15_const(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    movk x8, #14393, lsl #48
 ; CHECK-NEXT:    cmp x9, x8
-; CHECK-NEXT:    b.ne .LBB40_3
+; CHECK-NEXT:    b.ne .LBB42_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB40_3: // %res_block
+; CHECK-NEXT:  .LBB42_3: // %res_block
 ; CHECK-NEXT:    cmp x9, x8
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
@@ -782,7 +806,7 @@ define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    movk x8, #12594, lsl #48
 ; CHECK-NEXT:    cmp x9, x8
-; CHECK-NEXT:    b.ne .LBB42_3
+; CHECK-NEXT:    b.ne .LBB44_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    mov x8, #13365 // =0x3435
 ; CHECK-NEXT:    ldur x9, [x0, #7]
@@ -791,15 +815,15 @@ define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    movk x8, #14393, lsl #48
 ; CHECK-NEXT:    cmp x9, x8
-; CHECK-NEXT:    b.ne .LBB42_3
+; CHECK-NEXT:    b.ne .LBB44_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    mov w8, wzr
-; CHECK-NEXT:    b .LBB42_4
-; CHECK-NEXT:  .LBB42_3: // %res_block
+; CHECK-NEXT:    b .LBB44_4
+; CHECK-NEXT:  .LBB44_3: // %res_block
 ; CHECK-NEXT:    cmp x9, x8
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
-; CHECK-NEXT:  .LBB42_4: // %endblock
+; CHECK-NEXT:  .LBB44_4: // %endblock
 ; CHECK-NEXT:    cmp w8, #0
 ; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    ret
@@ -817,18 +841,18 @@ define i32 @length16(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB43_3
+; CHECK-NEXT:    b.ne .LBB45_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB43_3
+; CHECK-NEXT:    b.ne .LBB45_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB43_3: // %res_block
+; CHECK-NEXT:  .LBB45_3: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
@@ -859,18 +883,18 @@ define i1 @length16_lt(ptr %x, ptr %y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB45_3
+; CHECK-NEXT:    b.ne .LBB47_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB45_3
+; CHECK-NEXT:    b.ne .LBB47_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    lsr w0, wzr, #31
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB45_3: // %res_block
+; CHECK-NEXT:  .LBB47_3: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
@@ -889,22 +913,22 @@ define i1 @length16_gt(ptr %x, ptr %y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB46_3
+; CHECK-NEXT:    b.ne .LBB48_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB46_3
+; CHECK-NEXT:    b.ne .LBB48_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    mov w8, wzr
-; CHECK-NEXT:    b .LBB46_4
-; CHECK-NEXT:  .LBB46_3: // %res_block
+; CHECK-NEXT:    b .LBB48_4
+; CHECK-NEXT:  .LBB48_3: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
-; CHECK-NEXT:  .LBB46_4: // %endblock
+; CHECK-NEXT:  .LBB48_4: // %endblock
 ; CHECK-NEXT:    cmp w8, #0
 ; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    ret
@@ -943,25 +967,25 @@ define i32 @length24(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB48_4
+; CHECK-NEXT:    b.ne .LBB50_4
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB48_4
+; CHECK-NEXT:    b.ne .LBB50_4
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB48_4
+; CHECK-NEXT:    b.ne .LBB50_4
 ; CHECK-NEXT:  // %bb.3:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB48_4: // %res_block
+; CHECK-NEXT:  .LBB50_4: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
@@ -995,25 +1019,25 @@ define i1 @length24_lt(ptr %x, ptr %y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB50_4
+; CHECK-NEXT:    b.ne .LBB52_4
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB50_4
+; CHECK-NEXT:    b.ne .LBB52_4
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB50_4
+; CHECK-NEXT:    b.ne .LBB52_4
 ; CHECK-NEXT:  // %bb.3:
 ; CHECK-NEXT:    lsr w0, wzr, #31
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB50_4: // %res_block
+; CHECK-NEXT:  .LBB52_4: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
@@ -1032,29 +1056,29 @@ define i1 @length24_gt(ptr %x, ptr %y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB51_4
+; CHECK-NEXT:    b.ne .LBB53_4
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB51_4
+; CHECK-NEXT:    b.ne .LBB53_4
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB51_4
+; CHECK-NEXT:    b.ne .LBB53_4
 ; CHECK-NEXT:  // %bb.3:
 ; CHECK-NEXT:    mov w8, wzr
-; CHECK-NEXT:    b .LBB51_5
-; CHECK-NEXT:  .LBB51_4: // %res_block
+; CHECK-NEXT:    b .LBB53_5
+; CHECK-NEXT:  .LBB53_4: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
-; CHECK-NEXT:  .LBB51_5: // %endblock
+; CHECK-NEXT:  .LBB53_5: // %endblock
 ; CHECK-NEXT:    cmp w8, #0
 ; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    ret
@@ -1098,32 +1122,32 @@ define i32 @length31(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB53_5
+; CHECK-NEXT:    b.ne .LBB55_5
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB53_5
+; CHECK-NEXT:    b.ne .LBB55_5
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB53_5
+; CHECK-NEXT:    b.ne .LBB55_5
 ; CHECK-NEXT:  // %bb.3: // %loadbb3
 ; CHECK-NEXT:    ldur x8, [x0, #23]
 ; CHECK-NEXT:    ldur x9, [x1, #23]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB53_5
+; CHECK-NEXT:    b.ne .LBB55_5
 ; CHECK-NEXT:  // %bb.4:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB53_5: // %res_block
+; CHECK-NEXT:  .LBB55_5: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
@@ -1160,32 +1184,32 @@ define i1 @length31_lt(ptr %x, ptr %y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB55_5
+; CHECK-NEXT:    b.ne .LBB57_5
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB55_5
+; CHECK-NEXT:    b.ne .LBB57_5
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB55_5
+; CHECK-NEXT:    b.ne .LBB57_5
 ; CHECK-NEXT:  // %bb.3: // %loadbb3
 ; CHECK-NEXT:    ldur x8, [x0, #23]
 ; CHECK-NEXT:    ldur x9, [x1, #23]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB55_5
+; CHECK-NEXT:    b.ne .LBB57_5
 ; CHECK-NEXT:  // %bb.4:
 ; CHECK-NEXT:    lsr w0, wzr, #31
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB55_5: // %res_block
+; CHECK-NEXT:  .LBB57_5: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
@@ -1204,36 +1228,36 @@ define i1 @length31_gt(ptr %x, ptr %y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB56_5
+; CHECK-NEXT:    b.ne .LBB58_5
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB56_5
+; CHECK-NEXT:    b.ne .LBB58_5
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB56_5
+; CHECK-NEXT:    b.ne .LBB58_5
 ; CHECK-NEXT:  // %bb.3: // %loadbb3
 ; CHECK-NEXT:    ldur x8, [x0, #23]
 ; CHECK-NEXT:    ldur x9, [x1, #23]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB56_5
+; CHECK-NEXT:    b.ne .LBB58_5
 ; CHECK-NEXT:  // %bb.4:
 ; CHECK-NEXT:    mov w8, wzr
-; CHECK-NEXT:    b .LBB56_6
-; CHECK-NEXT:  .LBB56_5: // %res_block
+; CHECK-NEXT:    b .LBB58_6
+; CHECK-NEXT:  .LBB58_5: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
-; CHECK-NEXT:  .LBB56_6: // %endblock
+; CHECK-NEXT:  .LBB58_6: // %endblock
 ; CHECK-NEXT:    cmp w8, #0
 ; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    ret
@@ -1303,32 +1327,32 @@ define i32 @length32(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB59_5
+; CHECK-NEXT:    b.ne .LBB61_5
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB59_5
+; CHECK-NEXT:    b.ne .LBB61_5
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB59_5
+; CHECK-NEXT:    b.ne .LBB61_5
 ; CHECK-NEXT:  // %bb.3: // %loadbb3
 ; CHECK-NEXT:    ldr x8, [x0, #24]
 ; CHECK-NEXT:    ldr x9, [x1, #24]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB59_5
+; CHECK-NEXT:    b.ne .LBB61_5
 ; CHECK-NEXT:  // %bb.4:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB59_5: // %res_block
+; CHECK-NEXT:  .LBB61_5: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
@@ -1364,32 +1388,32 @@ define i1 @length32_lt(ptr %x, ptr %y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB61_5
+; CHECK-NEXT:    b.ne .LBB63_5
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB61_5
+; CHECK-NEXT:    b.ne .LBB63_5
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB61_5
+; CHECK-NEXT:    b.ne .LBB63_5
 ; CHECK-NEXT:  // %bb.3: // %loadbb3
 ; CHECK-NEXT:    ldr x8, [x0, #24]
 ; CHECK-NEXT:    ldr x9, [x1, #24]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB61_5
+; CHECK-NEXT:    b.ne .LBB63_5
 ; CHECK-NEXT:  // %bb.4:
 ; CHECK-NEXT:    lsr w0, wzr, #31
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB61_5: // %res_block
+; CHECK-NEXT:  .LBB63_5: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
@@ -1408,36 +1432,36 @@ define i1 @length32_gt(ptr %x, ptr %y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB62_5
+; CHECK-NEXT:    b.ne .LBB64_5
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB62_5
+; CHECK-NEXT:    b.ne .LBB64_5
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB62_5
+; CHECK-NEXT:    b.ne .LBB64_5
 ; CHECK-NEXT:  // %bb.3: // %loadbb3
 ; CHECK-NEXT:    ldr x8, [x0, #24]
 ; CHECK-NEXT:    ldr x9, [x1, #24]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB62_5
+; CHECK-NEXT:    b.ne .LBB64_5
 ; CHECK-NEXT:  // %bb.4:
 ; CHECK-NEXT:    mov w8, wzr
-; CHECK-NEXT:    b .LBB62_6
-; CHECK-NEXT:  .LBB62_5: // %res_block
+; CHECK-NEXT:    b .LBB64_6
+; CHECK-NEXT:  .LBB64_5: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
-; CHECK-NEXT:  .LBB62_6: // %endblock
+; CHECK-NEXT:  .LBB64_6: // %endblock
 ; CHECK-NEXT:    cmp w8, #0
 ; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    ret
@@ -1504,46 +1528,46 @@ define i32 @length48(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB65_7
+; CHECK-NEXT:    b.ne .LBB67_7
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB65_7
+; CHECK-NEXT:    b.ne .LBB67_7
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB65_7
+; CHECK-NEXT:    b.ne .LBB67_7
 ; CHECK-NEXT:  // %bb.3: // %loadbb3
 ; CHECK-NEXT:    ldr x8, [x0, #24]
 ; CHECK-NEXT:    ldr x9, [x1, #24]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB65_7
+; CHECK-NEXT:    b.ne .LBB67_7
 ; CHECK-NEXT:  // %bb.4: // %loadbb4
 ; CHECK-NEXT:    ldr x8, [x0, #32]
 ; CHECK-NEXT:    ldr x9, [x1, #32]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB65_7
+; CHECK-NEXT:    b.ne .LBB67_7
 ; CHECK-NEXT:  // %bb.5: // %loadbb5
 ; CHECK-NEXT:    ldr x8, [x0, #40]
 ; CHECK-NEXT:    ldr x9, [x1, #40]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB65_7
+; CHECK-NEXT:    b.ne .LBB67_7
 ; CHECK-NEXT:  // %bb.6:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB65_7: // %res_block
+; CHECK-NEXT:  .LBB67_7: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
@@ -1582,46 +1606,46 @@ define i1 @length48_lt(ptr %x, ptr %y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB67_7
+; CHECK-NEXT:    b.ne .LBB69_7
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB67_7
+; CHECK-NEXT:    b.ne .LBB69_7
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB67_7
+; CHECK-NEXT:    b.ne .LBB69_7
 ; CHECK-NEXT:  // %bb.3: // %loadbb3
 ; CHECK-NEXT:    ldr x8, [x0, #24]
 ; CHECK-NEXT:    ldr x9, [x1, #24]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB67_7
+; CHECK-NEXT:    b.ne .LBB69_7
 ; CHECK-NEXT:  // %bb.4: // %loadbb4
 ; CHECK-NEXT:    ldr x8, [x0, #32]
 ; CHECK-NEXT:    ldr x9, [x1, #32]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB67_7
+; CHECK-NEXT:    b.ne .LBB69_7
 ; CHECK-NEXT:  // %bb.5: // %loadbb5
 ; CHECK-NEXT:    ldr x8, [x0, #40]
 ; CHECK-NEXT:    ldr x9, [x1, #40]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB67_7
+; CHECK-NEXT:    b.ne .LBB69_7
 ; CHECK-NEXT:  // %bb.6:
 ; CHECK-NEXT:    lsr w0, wzr, #31
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB67_7: // %res_block
+; CHECK-NEXT:  .LBB69_7: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
@@ -1640,50 +1664,50 @@ define i1 @length48_gt(ptr %x, ptr %y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB68_7
+; CHECK-NEXT:    b.ne .LBB70_7
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB68_7
+; CHECK-NEXT:    b.ne .LBB70_7
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB68_7
+; CHECK-NEXT:    b.ne .LBB70_7
 ; CHECK-NEXT:  // %bb.3: // %loadbb3
 ; CHECK-NEXT:    ldr x8, [x0, #24]
 ; CHECK-NEXT:    ldr x9, [x1, #24]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB68_7
+; CHECK-NEXT:    b.ne .LBB70_7
 ; CHECK-NEXT:  // %bb.4: // %loadbb4
 ; CHECK-NEXT:    ldr x8, [x0, #32]
 ; CHECK-NEXT:    ldr x9, [x1, #32]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB68_7
+; CHECK-NEXT:    b.ne .LBB70_7
 ; CHECK-NEXT:  // %bb.5: // %loadbb5
 ; CHECK-NEXT:    ldr x8, [x0, #40]
 ; CHECK-NEXT:    ldr x9, [x1, #40]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB68_7
+; CHECK-NEXT:    b.ne .LBB70_7
 ; CHECK-NEXT:  // %bb.6:
 ; CHECK-NEXT:    mov w8, wzr
-; CHECK-NEXT:    b .LBB68_8
-; CHECK-NEXT:  .LBB68_7: // %res_block
+; CHECK-NEXT:    b .LBB70_8
+; CHECK-NEXT:  .LBB70_7: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
-; CHECK-NEXT:  .LBB68_8: // %endblock
+; CHECK-NEXT:  .LBB70_8: // %endblock
 ; CHECK-NEXT:    cmp w8, #0
 ; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    ret
@@ -1761,60 +1785,60 @@ define i32 @length63(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB71_9
+; CHECK-NEXT:    b.ne .LBB73_9
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB71_9
+; CHECK-NEXT:    b.ne .LBB73_9
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB71_9
+; CHECK-NEXT:    b.ne .LBB73_9
 ; CHECK-NEXT:  // %bb.3: // %loadbb3
 ; CHECK-NEXT:    ldr x8, [x0, #24]
 ; CHECK-NEXT:    ldr x9, [x1, #24]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB71_9
+; CHECK-NEXT:    b.ne .LBB73_9
 ; CHECK-NEXT:  // %bb.4: // %loadbb4
 ; CHECK-NEXT:    ldr x8, [x0, #32]
 ; CHECK-NEXT:    ldr x9, [x1, #32]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB71_9
+; CHECK-NEXT:    b.ne .LBB73_9
 ; CHECK-NEXT:  // %bb.5: // %loadbb5
 ; CHECK-NEXT:    ldr x8, [x0, #40]
 ; CHECK-NEXT:    ldr x9, [x1, #40]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB71_9
+; CHECK-NEXT:    b.ne .LBB73_9
 ; CHECK-NEXT:  // %bb.6: // %loadbb6
 ; CHECK-NEXT:    ldr x8, [x0, #48]
 ; CHECK-NEXT:    ldr x9, [x1, #48]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB71_9
+; CHECK-NEXT:    b.ne .LBB73_9
 ; CHECK-NEXT:  // %bb.7: // %loadbb7
 ; CHECK-NEXT:    ldur x8, [x0, #55]
 ; CHECK-NEXT:    ldur x9, [x1, #55]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB71_9
+; CHECK-NEXT:    b.ne .LBB73_9
 ; CHECK-NEXT:  // %bb.8:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB71_9: // %res_block
+; CHECK-NEXT:  .LBB73_9: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
@@ -1859,60 +1883,60 @@ define i1 @length63_lt(ptr %x, ptr %y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB73_9
+; CHECK-NEXT:    b.ne .LBB75_9
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB73_9
+; CHECK-NEXT:    b.ne .LBB75_9
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB73_9
+; CHECK-NEXT:    b.ne .LBB75_9
 ; CHECK-NEXT:  // %bb.3: // %loadbb3
 ; CHECK-NEXT:    ldr x8, [x0, #24]
 ; CHECK-NEXT:    ldr x9, [x1, #24]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB73_9
+; CHECK-NEXT:    b.ne .LBB75_9
 ; CHECK-NEXT:  // %bb.4: // %loadbb4
 ; CHECK-NEXT:    ldr x8, [x0, #32]
 ; CHECK-NEXT:    ldr x9, [x1, #32]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB73_9
+; CHECK-NEXT:    b.ne .LBB75_9
 ; CHECK-NEXT:  // %bb.5: // %loadbb5
 ; CHECK-NEXT:    ldr x8, [x0, #40]
 ; CHECK-NEXT:    ldr x9, [x1, #40]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB73_9
+; CHECK-NEXT:    b.ne .LBB75_9
 ; CHECK-NEXT:  // %bb.6: // %loadbb6
 ; CHECK-NEXT:    ldr x8, [x0, #48]
 ; CHECK-NEXT:    ldr x9, [x1, #48]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB73_9
+; CHECK-NEXT:    b.ne .LBB75_9
 ; CHECK-NEXT:  // %bb.7: // %loadbb7
 ; CHECK-NEXT:    ldur x8, [x0, #55]
 ; CHECK-NEXT:    ldur x9, [x1, #55]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB73_9
+; CHECK-NEXT:    b.ne .LBB75_9
 ; CHECK-NEXT:  // %bb.8:
 ; CHECK-NEXT:    lsr w0, wzr, #31
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB73_9: // %res_block
+; CHECK-NEXT:  .LBB75_9: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
@@ -1931,64 +1955,64 @@ define i1 @length63_gt(ptr %x, ptr %y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB74_9
+; CHECK-NEXT:    b.ne .LBB76_9
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB74_9
+; CHECK-NEXT:    b.ne .LBB76_9
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB74_9
+; CHECK-NEXT:    b.ne .LBB76_9
 ; CHECK-NEXT:  // %bb.3: // %loadbb3
 ; CHECK-NEXT:    ldr x8, [x0, #24]
 ; CHECK-NEXT:    ldr x9, [x1, #24]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB74_9
+; CHECK-NEXT:    b.ne .LBB76_9
 ; CHECK-NEXT:  // %bb.4: // %loadbb4
 ; CHECK-NEXT:    ldr x8, [x0, #32]
 ; CHECK-NEXT:    ldr x9, [x1, #32]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB74_9
+; CHECK-NEXT:    b.ne .LBB76_9
 ; CHECK-NEXT:  // %bb.5: // %loadbb5
 ; CHECK-NEXT:    ldr x8, [x0, #40]
 ; CHECK-NEXT:    ldr x9, [x1, #40]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB74_9
+; CHECK-NEXT:    b.ne .LBB76_9
 ; CHECK-NEXT:  // %bb.6: // %loadbb6
 ; CHECK-NEXT:    ldr x8, [x0, #48]
 ; CHECK-NEXT:    ldr x9, [x1, #48]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB74_9
+; CHECK-NEXT:    b.ne .LBB76_9
 ; CHECK-NEXT:  // %bb.7: // %loadbb7
 ; CHECK-NEXT:    ldur x8, [x0, #55]
 ; CHECK-NEXT:    ldur x9, [x1, #55]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB74_9
+; CHECK-NEXT:    b.ne .LBB76_9
 ; CHECK-NEXT:  // %bb.8:
 ; CHECK-NEXT:    mov w8, wzr
-; CHECK-NEXT:    b .LBB74_10
-; CHECK-NEXT:  .LBB74_9: // %res_block
+; CHECK-NEXT:    b .LBB76_10
+; CHECK-NEXT:  .LBB76_9: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
-; CHECK-NEXT:  .LBB74_10: // %endblock
+; CHECK-NEXT:  .LBB76_10: // %endblock
 ; CHECK-NEXT:    cmp w8, #0
 ; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    ret
@@ -2052,60 +2076,60 @@ define i32 @length64(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB76_9
+; CHECK-NEXT:    b.ne .LBB78_9
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB76_9
+; CHECK-NEXT:    b.ne .LBB78_9
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB76_9
+; CHECK-NEXT:    b.ne .LBB78_9
 ; CHECK-NEXT:  // %bb.3: // %loadbb3
 ; CHECK-NEXT:    ldr x8, [x0, #24]
 ; CHECK-NEXT:    ldr x9, [x1, #24]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB76_9
+; CHECK-NEXT:    b.ne .LBB78_9
 ; CHECK-NEXT:  // %bb.4: // %loadbb4
 ; CHECK-NEXT:    ldr x8, [x0, #32]
 ; CHECK-NEXT:    ldr x9, [x1, #32]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB76_9
+; CHECK-NEXT:    b.ne .LBB78_9
 ; CHECK-NEXT:  // %bb.5: // %loadbb5
 ; CHECK-NEXT:    ldr x8, [x0, #40]
 ; CHECK-NEXT:    ldr x9, [x1, #40]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB76_9
+; CHECK-NEXT:    b.ne .LBB78_9
 ; CHECK-NEXT:  // %bb.6: // %loadbb6
 ; CHECK-NEXT:    ldr x8, [x0, #48]
 ; CHECK-NEXT:    ldr x9, [x1, #48]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB76_9
+; CHECK-NEXT:    b.ne .LBB78_9
 ; CHECK-NEXT:  // %bb.7: // %loadbb7
 ; CHECK-NEXT:    ldr x8, [x0, #56]
 ; CHECK-NEXT:    ldr x9, [x1, #56]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB76_9
+; CHECK-NEXT:    b.ne .LBB78_9
 ; CHECK-NEXT:  // %bb.8:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB76_9: // %res_block
+; CHECK-NEXT:  .LBB78_9: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
@@ -2148,60 +2172,60 @@ define i1 @length64_lt(ptr %x, ptr %y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB78_9
+; CHECK-NEXT:    b.ne .LBB80_9
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB78_9
+; CHECK-NEXT:    b.ne .LBB80_9
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB78_9
+; CHECK-NEXT:    b.ne .LBB80_9
 ; CHECK-NEXT:  // %bb.3: // %loadbb3
 ; CHECK-NEXT:    ldr x8, [x0, #24]
 ; CHECK-NEXT:    ldr x9, [x1, #24]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB78_9
+; CHECK-NEXT:    b.ne .LBB80_9
 ; CHECK-NEXT:  // %bb.4: // %loadbb4
 ; CHECK-NEXT:    ldr x8, [x0, #32]
 ; CHECK-NEXT:    ldr x9, [x1, #32]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB78_9
+; CHECK-NEXT:    b.ne .LBB80_9
 ; CHECK-NEXT:  // %bb.5: // %loadbb5
 ; CHECK-NEXT:    ldr x8, [x0, #40]
 ; CHECK-NEXT:    ldr x9, [x1, #40]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB78_9
+; CHECK-NEXT:    b.ne .LBB80_9
 ; CHECK-NEXT:  // %bb.6: // %loadbb6
 ; CHECK-NEXT:    ldr x8, [x0, #48]
 ; CHECK-NEXT:    ldr x9, [x1, #48]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB78_9
+; CHECK-NEXT:    b.ne .LBB80_9
 ; CHECK-NEXT:  // %bb.7: // %loadbb7
 ; CHECK-NEXT:    ldr x8, [x0, #56]
 ; CHECK-NEXT:    ldr x9, [x1, #56]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB78_9
+; CHECK-NEXT:    b.ne .LBB80_9
 ; CHECK-NEXT:  // %bb.8:
 ; CHECK-NEXT:    lsr w0, wzr, #31
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB78_9: // %res_block
+; CHECK-NEXT:  .LBB80_9: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
@@ -2220,64 +2244,64 @@ define i1 @length64_gt(ptr %x, ptr %y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB79_9
+; CHECK-NEXT:    b.ne .LBB81_9
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB79_9
+; CHECK-NEXT:    b.ne .LBB81_9
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB79_9
+; CHECK-NEXT:    b.ne .LBB81_9
 ; CHECK-NEXT:  // %bb.3: // %loadbb3
 ; CHECK-NEXT:    ldr x8, [x0, #24]
 ; CHECK-NEXT:    ldr x9, [x1, #24]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB79_9
+; CHECK-NEXT:    b.ne .LBB81_9
 ; CHECK-NEXT:  // %bb.4: // %loadbb4
 ; CHECK-NEXT:    ldr x8, [x0, #32]
 ; CHECK-NEXT:    ldr x9, [x1, #32]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB79_9
+; CHECK-NEXT:    b.ne .LBB81_9
 ; CHECK-NEXT:  // %bb.5: // %loadbb5
 ; CHECK-NEXT:    ldr x8, [x0, #40]
 ; CHECK-NEXT:    ldr x9, [x1, #40]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB79_9
+; CHECK-NEXT:    b.ne .LBB81_9
 ; CHECK-NEXT:  // %bb.6: // %loadbb6
 ; CHECK-NEXT:    ldr x8, [x0, #48]
 ; CHECK-NEXT:    ldr x9, [x1, #48]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB79_9
+; CHECK-NEXT:    b.ne .LBB81_9
 ; CHECK-NEXT:  // %bb.7: // %loadbb7
 ; CHECK-NEXT:    ldr x8, [x0, #56]
 ; CHECK-NEXT:    ldr x9, [x1, #56]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB79_9
+; CHECK-NEXT:    b.ne .LBB81_9
 ; CHECK-NEXT:  // %bb.8:
 ; CHECK-NEXT:    mov w8, wzr
-; CHECK-NEXT:    b .LBB79_10
-; CHECK-NEXT:  .LBB79_9: // %res_block
+; CHECK-NEXT:    b .LBB81_10
+; CHECK-NEXT:  .LBB81_9: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
-; CHECK-NEXT:  .LBB79_10: // %endblock
+; CHECK-NEXT:  .LBB81_10: // %endblock
 ; CHECK-NEXT:    cmp w8, #0
 ; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
index c0f8f86e6e8b107..0253d131226083a 100644
--- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
@@ -226,15 +226,11 @@ define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl (%ecx), %ecx
-; X86-NEXT:    movl (%eax), %edx
+; X86-NEXT:    movl (%eax), %eax
 ; X86-NEXT:    bswapl %ecx
-; X86-NEXT:    bswapl %edx
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    cmpl %edx, %ecx
-; X86-NEXT:    seta %al
-; X86-NEXT:    sbbl $0, %eax
-; X86-NEXT:    shrl $31, %eax
-; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    cmpl %eax, %ecx
+; X86-NEXT:    setb %al
 ; X86-NEXT:    retl
   %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
   %c = icmp slt i32 %m, 0
@@ -250,12 +246,8 @@ define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
 ; X86-NEXT:    movl (%eax), %eax
 ; X86-NEXT:    bswapl %ecx
 ; X86-NEXT:    bswapl %eax
-; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    cmpl %eax, %ecx
-; X86-NEXT:    seta %dl
-; X86-NEXT:    sbbl $0, %edx
-; X86-NEXT:    testl %edx, %edx
-; X86-NEXT:    setg %al
+; X86-NEXT:    seta %al
 ; X86-NEXT:    retl
   %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
   %c = icmp sgt i32 %m, 0
diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
index 56d06021867fa15..6eb02bfc1fd0c39 100644
--- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
+++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
@@ -208,16 +208,12 @@ define i1 @length4_eq(ptr %X, ptr %Y) nounwind {
 define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
 ; X64-LABEL: length4_lt:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl (%rdi), %ecx
-; X64-NEXT:    movl (%rsi), %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    movl (%rsi), %ecx
+; X64-NEXT:    bswapl %eax
 ; X64-NEXT:    bswapl %ecx
-; X64-NEXT:    bswapl %edx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpl %edx, %ecx
-; X64-NEXT:    seta %al
-; X64-NEXT:    sbbl $0, %eax
-; X64-NEXT:    shrl $31, %eax
-; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    cmpl %ecx, %eax
+; X64-NEXT:    setb %al
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
   %c = icmp slt i32 %m, 0
@@ -231,12 +227,8 @@ define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
 ; X64-NEXT:    movl (%rsi), %ecx
 ; X64-NEXT:    bswapl %eax
 ; X64-NEXT:    bswapl %ecx
-; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    cmpl %ecx, %eax
-; X64-NEXT:    seta %dl
-; X64-NEXT:    sbbl $0, %edx
-; X64-NEXT:    testl %edx, %edx
-; X64-NEXT:    setg %al
+; X64-NEXT:    seta %al
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
   %c = icmp sgt i32 %m, 0
diff --git a/llvm/test/CodeGen/X86/memcmp-x32.ll b/llvm/test/CodeGen/X86/memcmp-x32.ll
index ab439b32f2f1b20..a63402cea209627 100644
--- a/llvm/test/CodeGen/X86/memcmp-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-x32.ll
@@ -254,15 +254,11 @@ define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl (%ecx), %ecx
-; X86-NEXT:    movl (%eax), %edx
+; X86-NEXT:    movl (%eax), %eax
 ; X86-NEXT:    bswapl %ecx
-; X86-NEXT:    bswapl %edx
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    cmpl %edx, %ecx
-; X86-NEXT:    seta %al
-; X86-NEXT:    sbbl $0, %eax
-; X86-NEXT:    shrl $31, %eax
-; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    cmpl %eax, %ecx
+; X86-NEXT:    setb %al
 ; X86-NEXT:    retl
   %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
   %c = icmp slt i32 %m, 0
@@ -278,12 +274,8 @@ define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
 ; X86-NEXT:    movl (%eax), %eax
 ; X86-NEXT:    bswapl %ecx
 ; X86-NEXT:    bswapl %eax
-; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    cmpl %eax, %ecx
-; X86-NEXT:    seta %dl
-; X86-NEXT:    sbbl $0, %edx
-; X86-NEXT:    testl %edx, %edx
-; X86-NEXT:    setg %al
+; X86-NEXT:    seta %al
 ; X86-NEXT:    retl
   %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
   %c = icmp sgt i32 %m, 0
diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll
index 1330f3a241a5c2a..f5e7384362a92b1 100644
--- a/llvm/test/CodeGen/X86/memcmp.ll
+++ b/llvm/test/CodeGen/X86/memcmp.ll
@@ -234,16 +234,12 @@ define i1 @length4_eq(ptr %X, ptr %Y) nounwind {
 define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
 ; X64-LABEL: length4_lt:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl (%rdi), %ecx
-; X64-NEXT:    movl (%rsi), %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    movl (%rsi), %ecx
+; X64-NEXT:    bswapl %eax
 ; X64-NEXT:    bswapl %ecx
-; X64-NEXT:    bswapl %edx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpl %edx, %ecx
-; X64-NEXT:    seta %al
-; X64-NEXT:    sbbl $0, %eax
-; X64-NEXT:    shrl $31, %eax
-; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    cmpl %ecx, %eax
+; X64-NEXT:    setb %al
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
   %c = icmp slt i32 %m, 0
@@ -257,12 +253,8 @@ define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
 ; X64-NEXT:    movl (%rsi), %ecx
 ; X64-NEXT:    bswapl %eax
 ; X64-NEXT:    bswapl %ecx
-; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    cmpl %ecx, %eax
-; X64-NEXT:    seta %dl
-; X64-NEXT:    sbbl $0, %edx
-; X64-NEXT:    testl %edx, %edx
-; X64-NEXT:    setg %al
+; X64-NEXT:    seta %al
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
   %c = icmp sgt i32 %m, 0

>From ee911ab53736b88a54ba9ac84d50a8ba37e2f238 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Fri, 3 Nov 2023 19:23:21 +0000
Subject: [PATCH 2/3] Apply clang-format

---
 llvm/lib/CodeGen/ExpandMemCmp.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index a3dd0feea2ff969..c3d73df58e46358 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -667,11 +667,13 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
     ICmpInst::Predicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE;
     uint64_t Shift;
     bool NeedsZExt = false;
-    // This is a special case because instead of checking if the result is less than zero:
+    // This is a special case because instead of checking if the result is less
+    // than zero:
     //    bool result = memcmp(a, b, NBYTES) < 0;
     // Compiler is clever enough to generate the following code:
     //    bool result = memcmp(a, b, NBYTES) >> 31;
-    if (match(UI, m_LShr(m_Value(), m_ConstantInt(Shift))) && Shift == CI->getType()->getIntegerBitWidth() - 1) {
+    if (match(UI, m_LShr(m_Value(), m_ConstantInt(Shift))) &&
+        Shift == CI->getType()->getIntegerBitWidth() - 1) {
       Pred = ICmpInst::ICMP_SLT;
       NeedsZExt = true;
     } else {
@@ -680,7 +682,8 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
     }
     // Generate new code and remove the original memcmp call and the user
     if (ICmpInst::isSigned(Pred)) {
-      Value *Cmp = Builder.CreateICmp(CmpInst::getUnsignedPredicate(Pred), Loads.Lhs, Loads.Rhs);
+      Value *Cmp = Builder.CreateICmp(CmpInst::getUnsignedPredicate(Pred),
+                                      Loads.Lhs, Loads.Rhs);
       auto *Result = NeedsZExt ? Builder.CreateZExt(Cmp, U->getType()) : Cmp;
       UI->replaceAllUsesWith(Result);
       UI->eraseFromParent();

>From 6974602b0d6b391f6b61eb5e3f637e549e98c223 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Mon, 6 Nov 2023 12:46:39 +0000
Subject: [PATCH 3/3] Address the comments

---
 llvm/lib/CodeGen/ExpandMemCmp.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index c3d73df58e46358..c98790cfeb58623 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -638,6 +638,9 @@ Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
 
 /// A memcmp expansion that only has one block of load and compare can bypass
 /// the compare, branch, and phi IR that is required in the general case.
+/// This function also analyses users of memcmp, and if there is only one user
+/// from which we can conclude that only 2 out of 3 memcmp outcomes really
+/// matter, then it generates more efficient code with only one comparison.
 Value *MemCmpExpansion::getMemCmpOneBlock() {
   bool NeedsBSwap = DL.isLittleEndian() && Size != 1;
   Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
@@ -662,8 +665,8 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
   // If a user of memcmp cares only about two outcomes, for example:
   //    bool result = memcmp(a, b, NBYTES) > 0;
   // We can generate more optimal code with a smaller number of operations
-  if (auto *U = CI->getUniqueUndroppableUser()) {
-    auto *UI = cast<Instruction>(U);
+  if (CI->hasOneUser()) {
+    auto *UI = cast<Instruction>(*CI->user_begin());
     ICmpInst::Predicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE;
     uint64_t Shift;
     bool NeedsZExt = false;
@@ -673,7 +676,7 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
     // Compiler is clever enough to generate the following code:
     //    bool result = memcmp(a, b, NBYTES) >> 31;
     if (match(UI, m_LShr(m_Value(), m_ConstantInt(Shift))) &&
-        Shift == CI->getType()->getIntegerBitWidth() - 1) {
+        Shift == (CI->getType()->getIntegerBitWidth() - 1)) {
       Pred = ICmpInst::ICMP_SLT;
       NeedsZExt = true;
     } else {



More information about the llvm-commits mailing list