[llvm] [ExpandMemCmp] Improve memcmp optimisation for boolean results (PR #71221)
Igor Kirillov via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 6 04:48:59 PST 2023
https://github.com/igogo-x86 updated https://github.com/llvm/llvm-project/pull/71221
>From 4848ed2255f875c86d1a8590c5a681bddf224428 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Fri, 3 Nov 2023 18:49:19 +0000
Subject: [PATCH 1/3] [ExpandMemCmp] Improve memcmp optimisation for boolean
results
This patch enhances the optimization of memcmp calls when only two outcomes
are needed and comparison fits into one block, for example:
bool result = memcmp(a, b, 6) > 0;
Previously, LLVM would generate unnecessary operations even when the user of
memcmp was only interested in a binary outcome.
---
llvm/lib/CodeGen/ExpandMemCmp.cpp | 45 +-
llvm/test/CodeGen/AArch64/memcmp.ll | 392 ++++++++++--------
.../CodeGen/X86/memcmp-more-load-pairs-x32.ll | 18 +-
.../CodeGen/X86/memcmp-more-load-pairs.ll | 20 +-
llvm/test/CodeGen/X86/memcmp-x32.ll | 18 +-
llvm/test/CodeGen/X86/memcmp.ll | 20 +-
6 files changed, 269 insertions(+), 244 deletions(-)
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 28e258be226a695..a3dd0feea2ff969 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -31,6 +32,7 @@
#include <optional>
using namespace llvm;
+using namespace llvm::PatternMatch;
namespace llvm {
class TargetLowering;
@@ -656,6 +658,37 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,
/*Offset*/ 0);
+
+ // If a user of memcmp cares only about two outcomes, for example:
+ // bool result = memcmp(a, b, NBYTES) > 0;
+ // We can generate more optimal code with a smaller number of operations
+ if (auto *U = CI->getUniqueUndroppableUser()) {
+ auto *UI = cast<Instruction>(U);
+ ICmpInst::Predicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE;
+ uint64_t Shift;
+ bool NeedsZExt = false;
+ // This is a special case because instead of checking if the result is less than zero:
+ // bool result = memcmp(a, b, NBYTES) < 0;
+ // Compiler is clever enough to generate the following code:
+ // bool result = memcmp(a, b, NBYTES) >> 31;
+ if (match(UI, m_LShr(m_Value(), m_ConstantInt(Shift))) && Shift == CI->getType()->getIntegerBitWidth() - 1) {
+ Pred = ICmpInst::ICMP_SLT;
+ NeedsZExt = true;
+ } else {
+ // In case of a successful match this call will set `Pred` variable
+ match(UI, m_ICmp(Pred, m_Specific(CI), m_Zero()));
+ }
+ // Generate new code and remove the original memcmp call and the user
+ if (ICmpInst::isSigned(Pred)) {
+ Value *Cmp = Builder.CreateICmp(CmpInst::getUnsignedPredicate(Pred), Loads.Lhs, Loads.Rhs);
+ auto *Result = NeedsZExt ? Builder.CreateZExt(Cmp, U->getType()) : Cmp;
+ UI->replaceAllUsesWith(Result);
+ UI->eraseFromParent();
+ CI->eraseFromParent();
+ return nullptr;
+ }
+ }
+
// The result of memcmp is negative, zero, or positive, so produce that by
// subtracting 2 extended compare bits: sub (ugt, ult).
// If a target prefers to use selects to get -1/0/1, they should be able
@@ -670,7 +703,7 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
}
// This function expands the memcmp call into an inline expansion and returns
-// the memcmp result.
+// the memcmp result. Returns nullptr if the memcmp is already replaced.
Value *MemCmpExpansion::getMemCmpExpansion() {
// Create the basic block framework for a multi-block expansion.
if (getNumBlocks() != 1) {
@@ -838,11 +871,11 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
NumMemCmpInlined++;
- Value *Res = Expansion.getMemCmpExpansion();
-
- // Replace call with result of expansion and erase call.
- CI->replaceAllUsesWith(Res);
- CI->eraseFromParent();
+ if (Value *Res = Expansion.getMemCmpExpansion()) {
+ // Replace call with result of expansion and erase call.
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+ }
return true;
}
diff --git a/llvm/test/CodeGen/AArch64/memcmp.ll b/llvm/test/CodeGen/AArch64/memcmp.ll
index d13a416a28761ca..4da7c8c95a4e4f0 100644
--- a/llvm/test/CodeGen/AArch64/memcmp.ll
+++ b/llvm/test/CodeGen/AArch64/memcmp.ll
@@ -222,16 +222,28 @@ define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev w8, w8
; CHECK-NEXT: rev w9, w9
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: cset w8, hi
-; CHECK-NEXT: cset w9, lo
-; CHECK-NEXT: sub w8, w8, w9
-; CHECK-NEXT: lsr w0, w8, #31
+; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
%c = icmp slt i32 %m, 0
ret i1 %c
}
+define i32 @length4_lt_32(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: length4_lt_32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr w8, [x0]
+; CHECK-NEXT: ldr w9, [x1]
+; CHECK-NEXT: rev w8, w8
+; CHECK-NEXT: rev w9, w9
+; CHECK-NEXT: cmp w8, w9
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+ %c = lshr i32 %m, 31
+ ret i32 %c
+}
+
define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
; CHECK-LABEL: length4_gt:
; CHECK: // %bb.0:
@@ -240,11 +252,7 @@ define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev w8, w8
; CHECK-NEXT: rev w9, w9
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: cset w8, hi
-; CHECK-NEXT: cset w9, lo
-; CHECK-NEXT: sub w8, w8, w9
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cset w0, gt
+; CHECK-NEXT: cset w0, hi
; CHECK-NEXT: ret
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
%c = icmp sgt i32 %m, 0
@@ -313,10 +321,7 @@ define i1 @length5_lt(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: cset w8, hi
-; CHECK-NEXT: cset w9, lo
-; CHECK-NEXT: sub w8, w8, w9
-; CHECK-NEXT: lsr w0, w8, #31
+; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
%c = icmp slt i32 %m, 0
@@ -343,6 +348,25 @@ define i32 @length6(ptr %X, ptr %Y) nounwind {
ret i32 %m
}
+define i32 @length6_lt(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: length6_lt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrh w8, [x0, #4]
+; CHECK-NEXT: ldr w9, [x0]
+; CHECK-NEXT: ldrh w10, [x1, #4]
+; CHECK-NEXT: ldr w11, [x1]
+; CHECK-NEXT: orr x8, x9, x8, lsl #32
+; CHECK-NEXT: orr x9, x11, x10, lsl #32
+; CHECK-NEXT: rev x8, x8
+; CHECK-NEXT: rev x9, x9
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 6) nounwind
+ %r = lshr i32 %m, 31
+ ret i32 %r
+}
+
define i32 @length7(ptr %X, ptr %Y) nounwind {
; CHECK-LABEL: length7:
; CHECK: // %bb.0:
@@ -351,18 +375,18 @@ define i32 @length7(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev w8, w8
; CHECK-NEXT: rev w9, w9
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB22_3
+; CHECK-NEXT: b.ne .LBB24_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldur w8, [x0, #3]
; CHECK-NEXT: ldur w9, [x1, #3]
; CHECK-NEXT: rev w8, w8
; CHECK-NEXT: rev w9, w9
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB22_3
+; CHECK-NEXT: b.ne .LBB24_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB22_3: // %res_block
+; CHECK-NEXT: .LBB24_3: // %res_block
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
@@ -379,18 +403,18 @@ define i1 @length7_lt(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev w8, w8
; CHECK-NEXT: rev w9, w9
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB23_3
+; CHECK-NEXT: b.ne .LBB25_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldur w8, [x0, #3]
; CHECK-NEXT: ldur w9, [x1, #3]
; CHECK-NEXT: rev w8, w8
; CHECK-NEXT: rev w9, w9
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB23_3
+; CHECK-NEXT: b.ne .LBB25_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: lsr w0, wzr, #31
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB23_3: // %res_block
+; CHECK-NEXT: .LBB25_3: // %res_block
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
@@ -470,13 +494,13 @@ define i32 @length9(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB28_2
+; CHECK-NEXT: b.ne .LBB30_2
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldrb w8, [x0, #8]
; CHECK-NEXT: ldrb w9, [x1, #8]
; CHECK-NEXT: sub w0, w8, w9
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB28_2: // %res_block
+; CHECK-NEXT: .LBB30_2: // %res_block
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
@@ -508,7 +532,7 @@ define i32 @length10(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB30_3
+; CHECK-NEXT: b.ne .LBB32_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldrh w8, [x0, #8]
; CHECK-NEXT: ldrh w9, [x1, #8]
@@ -517,11 +541,11 @@ define i32 @length10(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: lsr w8, w8, #16
; CHECK-NEXT: lsr w9, w9, #16
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB30_3
+; CHECK-NEXT: b.ne .LBB32_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB30_3: // %res_block
+; CHECK-NEXT: .LBB32_3: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
@@ -554,18 +578,18 @@ define i32 @length11(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB32_3
+; CHECK-NEXT: b.ne .LBB34_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldur x8, [x0, #3]
; CHECK-NEXT: ldur x9, [x1, #3]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB32_3
+; CHECK-NEXT: b.ne .LBB34_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB32_3: // %res_block
+; CHECK-NEXT: .LBB34_3: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
@@ -614,18 +638,18 @@ define i32 @length12(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB35_3
+; CHECK-NEXT: b.ne .LBB37_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr w8, [x0, #8]
; CHECK-NEXT: ldr w9, [x1, #8]
; CHECK-NEXT: rev w8, w8
; CHECK-NEXT: rev w9, w9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB35_3
+; CHECK-NEXT: b.ne .LBB37_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB35_3: // %res_block
+; CHECK-NEXT: .LBB37_3: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
@@ -674,18 +698,18 @@ define i32 @length15(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB38_3
+; CHECK-NEXT: b.ne .LBB40_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldur x8, [x0, #7]
; CHECK-NEXT: ldur x9, [x1, #7]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB38_3
+; CHECK-NEXT: b.ne .LBB40_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB38_3: // %res_block
+; CHECK-NEXT: .LBB40_3: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
@@ -702,18 +726,18 @@ define i1 @length15_lt(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB39_3
+; CHECK-NEXT: b.ne .LBB41_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldur x8, [x0, #7]
; CHECK-NEXT: ldur x9, [x1, #7]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB39_3
+; CHECK-NEXT: b.ne .LBB41_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: lsr w0, wzr, #31
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB39_3: // %res_block
+; CHECK-NEXT: .LBB41_3: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
@@ -734,7 +758,7 @@ define i32 @length15_const(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: movk x8, #12594, lsl #48
; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: b.ne .LBB40_3
+; CHECK-NEXT: b.ne .LBB42_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: mov x8, #13365 // =0x3435
; CHECK-NEXT: ldur x9, [x0, #7]
@@ -743,11 +767,11 @@ define i32 @length15_const(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: movk x8, #14393, lsl #48
; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: b.ne .LBB40_3
+; CHECK-NEXT: b.ne .LBB42_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB40_3: // %res_block
+; CHECK-NEXT: .LBB42_3: // %res_block
; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
@@ -782,7 +806,7 @@ define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: movk x8, #12594, lsl #48
; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: b.ne .LBB42_3
+; CHECK-NEXT: b.ne .LBB44_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: mov x8, #13365 // =0x3435
; CHECK-NEXT: ldur x9, [x0, #7]
@@ -791,15 +815,15 @@ define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: movk x8, #14393, lsl #48
; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: b.ne .LBB42_3
+; CHECK-NEXT: b.ne .LBB44_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: b .LBB42_4
-; CHECK-NEXT: .LBB42_3: // %res_block
+; CHECK-NEXT: b .LBB44_4
+; CHECK-NEXT: .LBB44_3: // %res_block
; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: .LBB42_4: // %endblock
+; CHECK-NEXT: .LBB44_4: // %endblock
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
@@ -817,18 +841,18 @@ define i32 @length16(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB43_3
+; CHECK-NEXT: b.ne .LBB45_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB43_3
+; CHECK-NEXT: b.ne .LBB45_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB43_3: // %res_block
+; CHECK-NEXT: .LBB45_3: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
@@ -859,18 +883,18 @@ define i1 @length16_lt(ptr %x, ptr %y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB45_3
+; CHECK-NEXT: b.ne .LBB47_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB45_3
+; CHECK-NEXT: b.ne .LBB47_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: lsr w0, wzr, #31
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB45_3: // %res_block
+; CHECK-NEXT: .LBB47_3: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
@@ -889,22 +913,22 @@ define i1 @length16_gt(ptr %x, ptr %y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB46_3
+; CHECK-NEXT: b.ne .LBB48_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB46_3
+; CHECK-NEXT: b.ne .LBB48_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: b .LBB46_4
-; CHECK-NEXT: .LBB46_3: // %res_block
+; CHECK-NEXT: b .LBB48_4
+; CHECK-NEXT: .LBB48_3: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: .LBB46_4: // %endblock
+; CHECK-NEXT: .LBB48_4: // %endblock
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
@@ -943,25 +967,25 @@ define i32 @length24(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB48_4
+; CHECK-NEXT: b.ne .LBB50_4
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB48_4
+; CHECK-NEXT: b.ne .LBB50_4
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB48_4
+; CHECK-NEXT: b.ne .LBB50_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB48_4: // %res_block
+; CHECK-NEXT: .LBB50_4: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
@@ -995,25 +1019,25 @@ define i1 @length24_lt(ptr %x, ptr %y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB50_4
+; CHECK-NEXT: b.ne .LBB52_4
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB50_4
+; CHECK-NEXT: b.ne .LBB52_4
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB50_4
+; CHECK-NEXT: b.ne .LBB52_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: lsr w0, wzr, #31
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB50_4: // %res_block
+; CHECK-NEXT: .LBB52_4: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
@@ -1032,29 +1056,29 @@ define i1 @length24_gt(ptr %x, ptr %y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB51_4
+; CHECK-NEXT: b.ne .LBB53_4
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB51_4
+; CHECK-NEXT: b.ne .LBB53_4
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB51_4
+; CHECK-NEXT: b.ne .LBB53_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: b .LBB51_5
-; CHECK-NEXT: .LBB51_4: // %res_block
+; CHECK-NEXT: b .LBB53_5
+; CHECK-NEXT: .LBB53_4: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: .LBB51_5: // %endblock
+; CHECK-NEXT: .LBB53_5: // %endblock
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
@@ -1098,32 +1122,32 @@ define i32 @length31(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB53_5
+; CHECK-NEXT: b.ne .LBB55_5
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB53_5
+; CHECK-NEXT: b.ne .LBB55_5
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB53_5
+; CHECK-NEXT: b.ne .LBB55_5
; CHECK-NEXT: // %bb.3: // %loadbb3
; CHECK-NEXT: ldur x8, [x0, #23]
; CHECK-NEXT: ldur x9, [x1, #23]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB53_5
+; CHECK-NEXT: b.ne .LBB55_5
; CHECK-NEXT: // %bb.4:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB53_5: // %res_block
+; CHECK-NEXT: .LBB55_5: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
@@ -1160,32 +1184,32 @@ define i1 @length31_lt(ptr %x, ptr %y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB55_5
+; CHECK-NEXT: b.ne .LBB57_5
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB55_5
+; CHECK-NEXT: b.ne .LBB57_5
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB55_5
+; CHECK-NEXT: b.ne .LBB57_5
; CHECK-NEXT: // %bb.3: // %loadbb3
; CHECK-NEXT: ldur x8, [x0, #23]
; CHECK-NEXT: ldur x9, [x1, #23]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB55_5
+; CHECK-NEXT: b.ne .LBB57_5
; CHECK-NEXT: // %bb.4:
; CHECK-NEXT: lsr w0, wzr, #31
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB55_5: // %res_block
+; CHECK-NEXT: .LBB57_5: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
@@ -1204,36 +1228,36 @@ define i1 @length31_gt(ptr %x, ptr %y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB56_5
+; CHECK-NEXT: b.ne .LBB58_5
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB56_5
+; CHECK-NEXT: b.ne .LBB58_5
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB56_5
+; CHECK-NEXT: b.ne .LBB58_5
; CHECK-NEXT: // %bb.3: // %loadbb3
; CHECK-NEXT: ldur x8, [x0, #23]
; CHECK-NEXT: ldur x9, [x1, #23]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB56_5
+; CHECK-NEXT: b.ne .LBB58_5
; CHECK-NEXT: // %bb.4:
; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: b .LBB56_6
-; CHECK-NEXT: .LBB56_5: // %res_block
+; CHECK-NEXT: b .LBB58_6
+; CHECK-NEXT: .LBB58_5: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: .LBB56_6: // %endblock
+; CHECK-NEXT: .LBB58_6: // %endblock
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
@@ -1303,32 +1327,32 @@ define i32 @length32(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB59_5
+; CHECK-NEXT: b.ne .LBB61_5
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB59_5
+; CHECK-NEXT: b.ne .LBB61_5
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB59_5
+; CHECK-NEXT: b.ne .LBB61_5
; CHECK-NEXT: // %bb.3: // %loadbb3
; CHECK-NEXT: ldr x8, [x0, #24]
; CHECK-NEXT: ldr x9, [x1, #24]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB59_5
+; CHECK-NEXT: b.ne .LBB61_5
; CHECK-NEXT: // %bb.4:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB59_5: // %res_block
+; CHECK-NEXT: .LBB61_5: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
@@ -1364,32 +1388,32 @@ define i1 @length32_lt(ptr %x, ptr %y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB61_5
+; CHECK-NEXT: b.ne .LBB63_5
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB61_5
+; CHECK-NEXT: b.ne .LBB63_5
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB61_5
+; CHECK-NEXT: b.ne .LBB63_5
; CHECK-NEXT: // %bb.3: // %loadbb3
; CHECK-NEXT: ldr x8, [x0, #24]
; CHECK-NEXT: ldr x9, [x1, #24]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB61_5
+; CHECK-NEXT: b.ne .LBB63_5
; CHECK-NEXT: // %bb.4:
; CHECK-NEXT: lsr w0, wzr, #31
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB61_5: // %res_block
+; CHECK-NEXT: .LBB63_5: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
@@ -1408,36 +1432,36 @@ define i1 @length32_gt(ptr %x, ptr %y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB62_5
+; CHECK-NEXT: b.ne .LBB64_5
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB62_5
+; CHECK-NEXT: b.ne .LBB64_5
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB62_5
+; CHECK-NEXT: b.ne .LBB64_5
; CHECK-NEXT: // %bb.3: // %loadbb3
; CHECK-NEXT: ldr x8, [x0, #24]
; CHECK-NEXT: ldr x9, [x1, #24]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB62_5
+; CHECK-NEXT: b.ne .LBB64_5
; CHECK-NEXT: // %bb.4:
; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: b .LBB62_6
-; CHECK-NEXT: .LBB62_5: // %res_block
+; CHECK-NEXT: b .LBB64_6
+; CHECK-NEXT: .LBB64_5: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: .LBB62_6: // %endblock
+; CHECK-NEXT: .LBB64_6: // %endblock
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
@@ -1504,46 +1528,46 @@ define i32 @length48(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB65_7
+; CHECK-NEXT: b.ne .LBB67_7
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB65_7
+; CHECK-NEXT: b.ne .LBB67_7
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB65_7
+; CHECK-NEXT: b.ne .LBB67_7
; CHECK-NEXT: // %bb.3: // %loadbb3
; CHECK-NEXT: ldr x8, [x0, #24]
; CHECK-NEXT: ldr x9, [x1, #24]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB65_7
+; CHECK-NEXT: b.ne .LBB67_7
; CHECK-NEXT: // %bb.4: // %loadbb4
; CHECK-NEXT: ldr x8, [x0, #32]
; CHECK-NEXT: ldr x9, [x1, #32]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB65_7
+; CHECK-NEXT: b.ne .LBB67_7
; CHECK-NEXT: // %bb.5: // %loadbb5
; CHECK-NEXT: ldr x8, [x0, #40]
; CHECK-NEXT: ldr x9, [x1, #40]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB65_7
+; CHECK-NEXT: b.ne .LBB67_7
; CHECK-NEXT: // %bb.6:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB65_7: // %res_block
+; CHECK-NEXT: .LBB67_7: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
@@ -1582,46 +1606,46 @@ define i1 @length48_lt(ptr %x, ptr %y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB67_7
+; CHECK-NEXT: b.ne .LBB69_7
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB67_7
+; CHECK-NEXT: b.ne .LBB69_7
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB67_7
+; CHECK-NEXT: b.ne .LBB69_7
; CHECK-NEXT: // %bb.3: // %loadbb3
; CHECK-NEXT: ldr x8, [x0, #24]
; CHECK-NEXT: ldr x9, [x1, #24]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB67_7
+; CHECK-NEXT: b.ne .LBB69_7
; CHECK-NEXT: // %bb.4: // %loadbb4
; CHECK-NEXT: ldr x8, [x0, #32]
; CHECK-NEXT: ldr x9, [x1, #32]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB67_7
+; CHECK-NEXT: b.ne .LBB69_7
; CHECK-NEXT: // %bb.5: // %loadbb5
; CHECK-NEXT: ldr x8, [x0, #40]
; CHECK-NEXT: ldr x9, [x1, #40]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB67_7
+; CHECK-NEXT: b.ne .LBB69_7
; CHECK-NEXT: // %bb.6:
; CHECK-NEXT: lsr w0, wzr, #31
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB67_7: // %res_block
+; CHECK-NEXT: .LBB69_7: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
@@ -1640,50 +1664,50 @@ define i1 @length48_gt(ptr %x, ptr %y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB68_7
+; CHECK-NEXT: b.ne .LBB70_7
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB68_7
+; CHECK-NEXT: b.ne .LBB70_7
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB68_7
+; CHECK-NEXT: b.ne .LBB70_7
; CHECK-NEXT: // %bb.3: // %loadbb3
; CHECK-NEXT: ldr x8, [x0, #24]
; CHECK-NEXT: ldr x9, [x1, #24]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB68_7
+; CHECK-NEXT: b.ne .LBB70_7
; CHECK-NEXT: // %bb.4: // %loadbb4
; CHECK-NEXT: ldr x8, [x0, #32]
; CHECK-NEXT: ldr x9, [x1, #32]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB68_7
+; CHECK-NEXT: b.ne .LBB70_7
; CHECK-NEXT: // %bb.5: // %loadbb5
; CHECK-NEXT: ldr x8, [x0, #40]
; CHECK-NEXT: ldr x9, [x1, #40]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB68_7
+; CHECK-NEXT: b.ne .LBB70_7
; CHECK-NEXT: // %bb.6:
; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: b .LBB68_8
-; CHECK-NEXT: .LBB68_7: // %res_block
+; CHECK-NEXT: b .LBB70_8
+; CHECK-NEXT: .LBB70_7: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: .LBB68_8: // %endblock
+; CHECK-NEXT: .LBB70_8: // %endblock
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
@@ -1761,60 +1785,60 @@ define i32 @length63(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB71_9
+; CHECK-NEXT: b.ne .LBB73_9
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB71_9
+; CHECK-NEXT: b.ne .LBB73_9
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB71_9
+; CHECK-NEXT: b.ne .LBB73_9
; CHECK-NEXT: // %bb.3: // %loadbb3
; CHECK-NEXT: ldr x8, [x0, #24]
; CHECK-NEXT: ldr x9, [x1, #24]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB71_9
+; CHECK-NEXT: b.ne .LBB73_9
; CHECK-NEXT: // %bb.4: // %loadbb4
; CHECK-NEXT: ldr x8, [x0, #32]
; CHECK-NEXT: ldr x9, [x1, #32]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB71_9
+; CHECK-NEXT: b.ne .LBB73_9
; CHECK-NEXT: // %bb.5: // %loadbb5
; CHECK-NEXT: ldr x8, [x0, #40]
; CHECK-NEXT: ldr x9, [x1, #40]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB71_9
+; CHECK-NEXT: b.ne .LBB73_9
; CHECK-NEXT: // %bb.6: // %loadbb6
; CHECK-NEXT: ldr x8, [x0, #48]
; CHECK-NEXT: ldr x9, [x1, #48]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB71_9
+; CHECK-NEXT: b.ne .LBB73_9
; CHECK-NEXT: // %bb.7: // %loadbb7
; CHECK-NEXT: ldur x8, [x0, #55]
; CHECK-NEXT: ldur x9, [x1, #55]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB71_9
+; CHECK-NEXT: b.ne .LBB73_9
; CHECK-NEXT: // %bb.8:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB71_9: // %res_block
+; CHECK-NEXT: .LBB73_9: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
@@ -1859,60 +1883,60 @@ define i1 @length63_lt(ptr %x, ptr %y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB73_9
+; CHECK-NEXT: b.ne .LBB75_9
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB73_9
+; CHECK-NEXT: b.ne .LBB75_9
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB73_9
+; CHECK-NEXT: b.ne .LBB75_9
; CHECK-NEXT: // %bb.3: // %loadbb3
; CHECK-NEXT: ldr x8, [x0, #24]
; CHECK-NEXT: ldr x9, [x1, #24]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB73_9
+; CHECK-NEXT: b.ne .LBB75_9
; CHECK-NEXT: // %bb.4: // %loadbb4
; CHECK-NEXT: ldr x8, [x0, #32]
; CHECK-NEXT: ldr x9, [x1, #32]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB73_9
+; CHECK-NEXT: b.ne .LBB75_9
; CHECK-NEXT: // %bb.5: // %loadbb5
; CHECK-NEXT: ldr x8, [x0, #40]
; CHECK-NEXT: ldr x9, [x1, #40]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB73_9
+; CHECK-NEXT: b.ne .LBB75_9
; CHECK-NEXT: // %bb.6: // %loadbb6
; CHECK-NEXT: ldr x8, [x0, #48]
; CHECK-NEXT: ldr x9, [x1, #48]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB73_9
+; CHECK-NEXT: b.ne .LBB75_9
; CHECK-NEXT: // %bb.7: // %loadbb7
; CHECK-NEXT: ldur x8, [x0, #55]
; CHECK-NEXT: ldur x9, [x1, #55]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB73_9
+; CHECK-NEXT: b.ne .LBB75_9
; CHECK-NEXT: // %bb.8:
; CHECK-NEXT: lsr w0, wzr, #31
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB73_9: // %res_block
+; CHECK-NEXT: .LBB75_9: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
@@ -1931,64 +1955,64 @@ define i1 @length63_gt(ptr %x, ptr %y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB74_9
+; CHECK-NEXT: b.ne .LBB76_9
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB74_9
+; CHECK-NEXT: b.ne .LBB76_9
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB74_9
+; CHECK-NEXT: b.ne .LBB76_9
; CHECK-NEXT: // %bb.3: // %loadbb3
; CHECK-NEXT: ldr x8, [x0, #24]
; CHECK-NEXT: ldr x9, [x1, #24]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB74_9
+; CHECK-NEXT: b.ne .LBB76_9
; CHECK-NEXT: // %bb.4: // %loadbb4
; CHECK-NEXT: ldr x8, [x0, #32]
; CHECK-NEXT: ldr x9, [x1, #32]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB74_9
+; CHECK-NEXT: b.ne .LBB76_9
; CHECK-NEXT: // %bb.5: // %loadbb5
; CHECK-NEXT: ldr x8, [x0, #40]
; CHECK-NEXT: ldr x9, [x1, #40]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB74_9
+; CHECK-NEXT: b.ne .LBB76_9
; CHECK-NEXT: // %bb.6: // %loadbb6
; CHECK-NEXT: ldr x8, [x0, #48]
; CHECK-NEXT: ldr x9, [x1, #48]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB74_9
+; CHECK-NEXT: b.ne .LBB76_9
; CHECK-NEXT: // %bb.7: // %loadbb7
; CHECK-NEXT: ldur x8, [x0, #55]
; CHECK-NEXT: ldur x9, [x1, #55]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB74_9
+; CHECK-NEXT: b.ne .LBB76_9
; CHECK-NEXT: // %bb.8:
; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: b .LBB74_10
-; CHECK-NEXT: .LBB74_9: // %res_block
+; CHECK-NEXT: b .LBB76_10
+; CHECK-NEXT: .LBB76_9: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: .LBB74_10: // %endblock
+; CHECK-NEXT: .LBB76_10: // %endblock
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
@@ -2052,60 +2076,60 @@ define i32 @length64(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB76_9
+; CHECK-NEXT: b.ne .LBB78_9
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB76_9
+; CHECK-NEXT: b.ne .LBB78_9
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB76_9
+; CHECK-NEXT: b.ne .LBB78_9
; CHECK-NEXT: // %bb.3: // %loadbb3
; CHECK-NEXT: ldr x8, [x0, #24]
; CHECK-NEXT: ldr x9, [x1, #24]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB76_9
+; CHECK-NEXT: b.ne .LBB78_9
; CHECK-NEXT: // %bb.4: // %loadbb4
; CHECK-NEXT: ldr x8, [x0, #32]
; CHECK-NEXT: ldr x9, [x1, #32]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB76_9
+; CHECK-NEXT: b.ne .LBB78_9
; CHECK-NEXT: // %bb.5: // %loadbb5
; CHECK-NEXT: ldr x8, [x0, #40]
; CHECK-NEXT: ldr x9, [x1, #40]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB76_9
+; CHECK-NEXT: b.ne .LBB78_9
; CHECK-NEXT: // %bb.6: // %loadbb6
; CHECK-NEXT: ldr x8, [x0, #48]
; CHECK-NEXT: ldr x9, [x1, #48]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB76_9
+; CHECK-NEXT: b.ne .LBB78_9
; CHECK-NEXT: // %bb.7: // %loadbb7
; CHECK-NEXT: ldr x8, [x0, #56]
; CHECK-NEXT: ldr x9, [x1, #56]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB76_9
+; CHECK-NEXT: b.ne .LBB78_9
; CHECK-NEXT: // %bb.8:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB76_9: // %res_block
+; CHECK-NEXT: .LBB78_9: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
@@ -2148,60 +2172,60 @@ define i1 @length64_lt(ptr %x, ptr %y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB78_9
+; CHECK-NEXT: b.ne .LBB80_9
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB78_9
+; CHECK-NEXT: b.ne .LBB80_9
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB78_9
+; CHECK-NEXT: b.ne .LBB80_9
; CHECK-NEXT: // %bb.3: // %loadbb3
; CHECK-NEXT: ldr x8, [x0, #24]
; CHECK-NEXT: ldr x9, [x1, #24]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB78_9
+; CHECK-NEXT: b.ne .LBB80_9
; CHECK-NEXT: // %bb.4: // %loadbb4
; CHECK-NEXT: ldr x8, [x0, #32]
; CHECK-NEXT: ldr x9, [x1, #32]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB78_9
+; CHECK-NEXT: b.ne .LBB80_9
; CHECK-NEXT: // %bb.5: // %loadbb5
; CHECK-NEXT: ldr x8, [x0, #40]
; CHECK-NEXT: ldr x9, [x1, #40]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB78_9
+; CHECK-NEXT: b.ne .LBB80_9
; CHECK-NEXT: // %bb.6: // %loadbb6
; CHECK-NEXT: ldr x8, [x0, #48]
; CHECK-NEXT: ldr x9, [x1, #48]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB78_9
+; CHECK-NEXT: b.ne .LBB80_9
; CHECK-NEXT: // %bb.7: // %loadbb7
; CHECK-NEXT: ldr x8, [x0, #56]
; CHECK-NEXT: ldr x9, [x1, #56]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB78_9
+; CHECK-NEXT: b.ne .LBB80_9
; CHECK-NEXT: // %bb.8:
; CHECK-NEXT: lsr w0, wzr, #31
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB78_9: // %res_block
+; CHECK-NEXT: .LBB80_9: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
@@ -2220,64 +2244,64 @@ define i1 @length64_gt(ptr %x, ptr %y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB79_9
+; CHECK-NEXT: b.ne .LBB81_9
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB79_9
+; CHECK-NEXT: b.ne .LBB81_9
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB79_9
+; CHECK-NEXT: b.ne .LBB81_9
; CHECK-NEXT: // %bb.3: // %loadbb3
; CHECK-NEXT: ldr x8, [x0, #24]
; CHECK-NEXT: ldr x9, [x1, #24]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB79_9
+; CHECK-NEXT: b.ne .LBB81_9
; CHECK-NEXT: // %bb.4: // %loadbb4
; CHECK-NEXT: ldr x8, [x0, #32]
; CHECK-NEXT: ldr x9, [x1, #32]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB79_9
+; CHECK-NEXT: b.ne .LBB81_9
; CHECK-NEXT: // %bb.5: // %loadbb5
; CHECK-NEXT: ldr x8, [x0, #40]
; CHECK-NEXT: ldr x9, [x1, #40]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB79_9
+; CHECK-NEXT: b.ne .LBB81_9
; CHECK-NEXT: // %bb.6: // %loadbb6
; CHECK-NEXT: ldr x8, [x0, #48]
; CHECK-NEXT: ldr x9, [x1, #48]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB79_9
+; CHECK-NEXT: b.ne .LBB81_9
; CHECK-NEXT: // %bb.7: // %loadbb7
; CHECK-NEXT: ldr x8, [x0, #56]
; CHECK-NEXT: ldr x9, [x1, #56]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB79_9
+; CHECK-NEXT: b.ne .LBB81_9
; CHECK-NEXT: // %bb.8:
; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: b .LBB79_10
-; CHECK-NEXT: .LBB79_9: // %res_block
+; CHECK-NEXT: b .LBB81_10
+; CHECK-NEXT: .LBB81_9: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: .LBB79_10: // %endblock
+; CHECK-NEXT: .LBB81_10: // %endblock
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
index c0f8f86e6e8b107..0253d131226083a 100644
--- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
@@ -226,15 +226,11 @@ define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %ecx
-; X86-NEXT: movl (%eax), %edx
+; X86-NEXT: movl (%eax), %eax
; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: seta %al
-; X86-NEXT: sbbl $0, %eax
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: bswapl %eax
+; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: setb %al
; X86-NEXT: retl
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
%c = icmp slt i32 %m, 0
@@ -250,12 +246,8 @@ define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
; X86-NEXT: movl (%eax), %eax
; X86-NEXT: bswapl %ecx
; X86-NEXT: bswapl %eax
-; X86-NEXT: xorl %edx, %edx
; X86-NEXT: cmpl %eax, %ecx
-; X86-NEXT: seta %dl
-; X86-NEXT: sbbl $0, %edx
-; X86-NEXT: testl %edx, %edx
-; X86-NEXT: setg %al
+; X86-NEXT: seta %al
; X86-NEXT: retl
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
%c = icmp sgt i32 %m, 0
diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
index 56d06021867fa15..6eb02bfc1fd0c39 100644
--- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
+++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
@@ -208,16 +208,12 @@ define i1 @length4_eq(ptr %X, ptr %Y) nounwind {
define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
; X64-LABEL: length4_lt:
; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl (%rsi), %edx
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: movl (%rsi), %ecx
+; X64-NEXT: bswapl %eax
; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: seta %al
-; X64-NEXT: sbbl $0, %eax
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: cmpl %ecx, %eax
+; X64-NEXT: setb %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
%c = icmp slt i32 %m, 0
@@ -231,12 +227,8 @@ define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %eax
; X64-NEXT: bswapl %ecx
-; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpl %ecx, %eax
-; X64-NEXT: seta %dl
-; X64-NEXT: sbbl $0, %edx
-; X64-NEXT: testl %edx, %edx
-; X64-NEXT: setg %al
+; X64-NEXT: seta %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
%c = icmp sgt i32 %m, 0
diff --git a/llvm/test/CodeGen/X86/memcmp-x32.ll b/llvm/test/CodeGen/X86/memcmp-x32.ll
index ab439b32f2f1b20..a63402cea209627 100644
--- a/llvm/test/CodeGen/X86/memcmp-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-x32.ll
@@ -254,15 +254,11 @@ define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %ecx
-; X86-NEXT: movl (%eax), %edx
+; X86-NEXT: movl (%eax), %eax
; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: seta %al
-; X86-NEXT: sbbl $0, %eax
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: bswapl %eax
+; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: setb %al
; X86-NEXT: retl
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
%c = icmp slt i32 %m, 0
@@ -278,12 +274,8 @@ define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
; X86-NEXT: movl (%eax), %eax
; X86-NEXT: bswapl %ecx
; X86-NEXT: bswapl %eax
-; X86-NEXT: xorl %edx, %edx
; X86-NEXT: cmpl %eax, %ecx
-; X86-NEXT: seta %dl
-; X86-NEXT: sbbl $0, %edx
-; X86-NEXT: testl %edx, %edx
-; X86-NEXT: setg %al
+; X86-NEXT: seta %al
; X86-NEXT: retl
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
%c = icmp sgt i32 %m, 0
diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll
index 1330f3a241a5c2a..f5e7384362a92b1 100644
--- a/llvm/test/CodeGen/X86/memcmp.ll
+++ b/llvm/test/CodeGen/X86/memcmp.ll
@@ -234,16 +234,12 @@ define i1 @length4_eq(ptr %X, ptr %Y) nounwind {
define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
; X64-LABEL: length4_lt:
; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl (%rsi), %edx
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: movl (%rsi), %ecx
+; X64-NEXT: bswapl %eax
; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
-; X64-NEXT: seta %al
-; X64-NEXT: sbbl $0, %eax
-; X64-NEXT: shrl $31, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: cmpl %ecx, %eax
+; X64-NEXT: setb %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
%c = icmp slt i32 %m, 0
@@ -257,12 +253,8 @@ define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %eax
; X64-NEXT: bswapl %ecx
-; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpl %ecx, %eax
-; X64-NEXT: seta %dl
-; X64-NEXT: sbbl $0, %edx
-; X64-NEXT: testl %edx, %edx
-; X64-NEXT: setg %al
+; X64-NEXT: seta %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
%c = icmp sgt i32 %m, 0
>From ee911ab53736b88a54ba9ac84d50a8ba37e2f238 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Fri, 3 Nov 2023 19:23:21 +0000
Subject: [PATCH 2/3] Apply clang-format
---
llvm/lib/CodeGen/ExpandMemCmp.cpp | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index a3dd0feea2ff969..c3d73df58e46358 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -667,11 +667,13 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
ICmpInst::Predicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE;
uint64_t Shift;
bool NeedsZExt = false;
- // This is a special case because instead of checking if the result is less than zero:
+ // This is a special case because instead of checking if the result is less
+ // than zero:
// bool result = memcmp(a, b, NBYTES) < 0;
// Compiler is clever enough to generate the following code:
// bool result = memcmp(a, b, NBYTES) >> 31;
- if (match(UI, m_LShr(m_Value(), m_ConstantInt(Shift))) && Shift == CI->getType()->getIntegerBitWidth() - 1) {
+ if (match(UI, m_LShr(m_Value(), m_ConstantInt(Shift))) &&
+ Shift == CI->getType()->getIntegerBitWidth() - 1) {
Pred = ICmpInst::ICMP_SLT;
NeedsZExt = true;
} else {
@@ -680,7 +682,8 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
}
// Generate new code and remove the original memcmp call and the user
if (ICmpInst::isSigned(Pred)) {
- Value *Cmp = Builder.CreateICmp(CmpInst::getUnsignedPredicate(Pred), Loads.Lhs, Loads.Rhs);
+ Value *Cmp = Builder.CreateICmp(CmpInst::getUnsignedPredicate(Pred),
+ Loads.Lhs, Loads.Rhs);
auto *Result = NeedsZExt ? Builder.CreateZExt(Cmp, U->getType()) : Cmp;
UI->replaceAllUsesWith(Result);
UI->eraseFromParent();
>From 6974602b0d6b391f6b61eb5e3f637e549e98c223 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Mon, 6 Nov 2023 12:46:39 +0000
Subject: [PATCH 3/3] Address the comments
---
llvm/lib/CodeGen/ExpandMemCmp.cpp | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index c3d73df58e46358..c98790cfeb58623 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -638,6 +638,9 @@ Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
/// A memcmp expansion that only has one block of load and compare can bypass
/// the compare, branch, and phi IR that is required in the general case.
+/// This function also analyses users of memcmp, and if there is only one user
+/// from which we can conclude that only 2 out of 3 memcmp outcomes really
+/// matter, then it generates more efficient code with only one comparison.
Value *MemCmpExpansion::getMemCmpOneBlock() {
bool NeedsBSwap = DL.isLittleEndian() && Size != 1;
Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
@@ -662,8 +665,8 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
// If a user of memcmp cares only about two outcomes, for example:
// bool result = memcmp(a, b, NBYTES) > 0;
// We can generate more optimal code with a smaller number of operations
- if (auto *U = CI->getUniqueUndroppableUser()) {
- auto *UI = cast<Instruction>(U);
+ if (CI->hasOneUser()) {
+ auto *UI = cast<Instruction>(*CI->user_begin());
ICmpInst::Predicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE;
uint64_t Shift;
bool NeedsZExt = false;
@@ -673,7 +676,7 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
// Compiler is clever enough to generate the following code:
// bool result = memcmp(a, b, NBYTES) >> 31;
if (match(UI, m_LShr(m_Value(), m_ConstantInt(Shift))) &&
- Shift == CI->getType()->getIntegerBitWidth() - 1) {
+ Shift == (CI->getType()->getIntegerBitWidth() - 1)) {
Pred = ICmpInst::ICMP_SLT;
NeedsZExt = true;
} else {
More information about the llvm-commits
mailing list