[llvm-branch-commits] [llvm] [RISCV] Add initial support of memcmp expansion (PR #107548)

Pengcheng Wang via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Sep 12 22:13:21 PDT 2024


https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/107548

>From f21cfcfc90330ee3856746b6315a81a00313b0e0 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Fri, 6 Sep 2024 17:20:51 +0800
Subject: [PATCH 1/5] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
 =?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.6-beta.1
---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp |  15 +
 .../Target/RISCV/RISCVTargetTransformInfo.h   |   3 +
 llvm/test/CodeGen/RISCV/memcmp.ll             | 932 ++++++++++++++++++
 3 files changed, 950 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/memcmp.ll

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index e809e15eacf696..ad532aadc83266 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2113,3 +2113,18 @@ bool RISCVTTIImpl::shouldConsiderAddressTypePromotion(
   }
   return Considerable;
 }
+
+RISCVTTIImpl::TTI::MemCmpExpansionOptions
+RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+  TTI::MemCmpExpansionOptions Options;
+  // FIXME: Vector haven't been tested.
+  Options.AllowOverlappingLoads =
+      (ST->enableUnalignedScalarMem() || ST->enableUnalignedScalarMem());
+  Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
+  Options.NumLoadsPerBlock = Options.MaxNumLoads;
+  if (ST->is64Bit())
+    Options.LoadSizes.push_back(8);
+  llvm::append_range(Options.LoadSizes, ArrayRef({4, 2, 1}));
+  Options.AllowedTailExpansions = {3, 5, 6};
+  return Options;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 763b89bfec0a66..ee9bed09df97f3 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -404,6 +404,9 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
   shouldConsiderAddressTypePromotion(const Instruction &I,
                                      bool &AllowPromotionWithoutCommonHeader);
   std::optional<unsigned> getMinPageSize() const { return 4096; }
+
+  TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+                                                    bool IsZeroCmp) const;
 };
 
 } // end namespace llvm
diff --git a/llvm/test/CodeGen/RISCV/memcmp.ll b/llvm/test/CodeGen/RISCV/memcmp.ll
new file mode 100644
index 00000000000000..652cd02e2c750a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/memcmp.ll
@@ -0,0 +1,932 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -O2 | FileCheck %s --check-prefix=CHECK-ALIGNED-RV32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -O2 | FileCheck %s --check-prefix=CHECK-ALIGNED-RV64
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+unaligned-scalar-mem -O2 \
+; RUN:   | FileCheck %s --check-prefix=CHECK-UNALIGNED-RV32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+unaligned-scalar-mem -O2 \
+; RUN:   | FileCheck %s --check-prefix=CHECK-UNALIGNED-RV64
+
+declare i32 @bcmp(i8*, i8*, iXLen) nounwind readonly
+declare i32 @memcmp(i8*, i8*, iXLen) nounwind readonly
+
+define i1 @bcmp_size_15(i8* %s1, i8* %s2) {
+; CHECK-ALIGNED-RV32-LABEL: bcmp_size_15:
+; CHECK-ALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a5, 3(a0)
+; CHECK-ALIGNED-RV32-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV32-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV32-NEXT:    slli a5, a5, 24
+; CHECK-ALIGNED-RV32-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV32-NEXT:    or a2, a4, a2
+; CHECK-ALIGNED-RV32-NEXT:    lbu a3, 1(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a4, 0(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a5, 2(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a6, 3(a1)
+; CHECK-ALIGNED-RV32-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV32-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV32-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV32-NEXT:    or a4, a6, a5
+; CHECK-ALIGNED-RV32-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV32-NEXT:    xor a2, a2, a3
+; CHECK-ALIGNED-RV32-NEXT:    lbu a3, 5(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a4, 4(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a5, 6(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a6, 7(a0)
+; CHECK-ALIGNED-RV32-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV32-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV32-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV32-NEXT:    or a4, a6, a5
+; CHECK-ALIGNED-RV32-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV32-NEXT:    lbu a4, 5(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a5, 4(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a6, 6(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a7, 7(a1)
+; CHECK-ALIGNED-RV32-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV32-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV32-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV32-NEXT:    or a5, a7, a6
+; CHECK-ALIGNED-RV32-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV32-NEXT:    xor a3, a3, a4
+; CHECK-ALIGNED-RV32-NEXT:    lbu a4, 9(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a5, 8(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a6, 10(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a7, 11(a0)
+; CHECK-ALIGNED-RV32-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV32-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV32-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV32-NEXT:    or a5, a7, a6
+; CHECK-ALIGNED-RV32-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV32-NEXT:    lbu a5, 9(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a6, 8(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a7, 10(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu t0, 11(a1)
+; CHECK-ALIGNED-RV32-NEXT:    slli a5, a5, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV32-NEXT:    slli a7, a7, 16
+; CHECK-ALIGNED-RV32-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV32-NEXT:    or a6, t0, a7
+; CHECK-ALIGNED-RV32-NEXT:    lbu a7, 13(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu t0, 12(a0)
+; CHECK-ALIGNED-RV32-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV32-NEXT:    xor a4, a4, a5
+; CHECK-ALIGNED-RV32-NEXT:    slli a7, a7, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a5, a7, t0
+; CHECK-ALIGNED-RV32-NEXT:    lbu a6, 13(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a7, 12(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a0, 14(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a1, 14(a1)
+; CHECK-ALIGNED-RV32-NEXT:    slli a6, a6, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a6, a6, a7
+; CHECK-ALIGNED-RV32-NEXT:    xor a5, a5, a6
+; CHECK-ALIGNED-RV32-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV32-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV32-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV32-NEXT:    or a2, a2, a4
+; CHECK-ALIGNED-RV32-NEXT:    or a0, a2, a0
+; CHECK-ALIGNED-RV32-NEXT:    seqz a0, a0
+; CHECK-ALIGNED-RV32-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-LABEL: bcmp_size_15:
+; CHECK-ALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 3(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a4, a2
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 5(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 4(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 6(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 7(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    slli a3, a3, 32
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a3, a2
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 1(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 0(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 2(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 3(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 5(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 4(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 6(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 7(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a7, a6
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 32
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    xor a2, a2, a3
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 9(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 8(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 10(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 11(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 9(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 8(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 10(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 11(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a7, a6
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 13(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 12(a0)
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    xor a3, a3, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a6, a7
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 13(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 12(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a0, 14(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a1, 14(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-NEXT:    xor a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a4, a0
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a3, a0
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-ALIGNED-RV64-NEXT:    seqz a0, a0
+; CHECK-ALIGNED-RV64-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_15:
+; CHECK-UNALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a4, 4(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a5, 4(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a6, 8(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a7, 8(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a0, 11(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a1, 11(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT:    xor a4, a4, a5
+; CHECK-UNALIGNED-RV32-NEXT:    xor a3, a6, a7
+; CHECK-UNALIGNED-RV32-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT:    or a2, a2, a4
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, a3, a0
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-NEXT:    seqz a0, a0
+; CHECK-UNALIGNED-RV32-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_15:
+; CHECK-UNALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:    ld a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a3, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a0, 7(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a1, 7(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT:    seqz a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:    ret
+entry:
+  %bcmp = call i32 @bcmp(i8* %s1, i8* %s2, iXLen 15)
+  %ret = icmp eq i32 %bcmp, 0
+  ret i1 %ret
+}
+
+define i1 @bcmp_size_31(i8* %s1, i8* %s2) {
+; CHECK-ALIGNED-RV32-LABEL: bcmp_size_31:
+; CHECK-ALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-NEXT:    addi sp, sp, -16
+; CHECK-ALIGNED-RV32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-ALIGNED-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-ALIGNED-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-ALIGNED-RV32-NEXT:    li a2, 31
+; CHECK-ALIGNED-RV32-NEXT:    call bcmp
+; CHECK-ALIGNED-RV32-NEXT:    seqz a0, a0
+; CHECK-ALIGNED-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-ALIGNED-RV32-NEXT:    addi sp, sp, 16
+; CHECK-ALIGNED-RV32-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-LABEL: bcmp_size_31:
+; CHECK-ALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 3(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a4, a2
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 5(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 4(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 6(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 7(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    slli a3, a3, 32
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a3, a2
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 1(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 0(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 2(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 3(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 5(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 4(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 6(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 7(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a7, a6
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 32
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    xor a2, a2, a3
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 9(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 8(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 10(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 11(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 13(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 12(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 14(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 15(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a7, a6
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 32
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 9(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 8(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 10(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 11(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a7, a6
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 13(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 12(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 14(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 15(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a6, t0, a7
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 32
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    xor a3, a3, a4
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 17(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 16(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 18(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 19(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a7, a6
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 21(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 20(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 22(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 23(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a6, t0, a7
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 32
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 17(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 16(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 18(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 19(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a6, t0, a7
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 21(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 20(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 22(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t1, 23(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a6, a6, a7
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli t1, t1, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a7, t1, t0
+; CHECK-ALIGNED-RV64-NEXT:    or a6, a7, a6
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 32
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    xor a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 25(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 24(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 26(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 27(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a6, t0, a7
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 25(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 24(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 26(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t1, 27(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a6, a6, a7
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli t1, t1, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a7, t1, t0
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 29(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t1, 28(a0)
+; CHECK-ALIGNED-RV64-NEXT:    or a6, a7, a6
+; CHECK-ALIGNED-RV64-NEXT:    xor a5, a5, a6
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a6, t0, t1
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 29(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 28(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a0, 30(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a1, 30(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a7, a7, t0
+; CHECK-ALIGNED-RV64-NEXT:    xor a6, a6, a7
+; CHECK-ALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a6, a0
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a2, a4
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-ALIGNED-RV64-NEXT:    seqz a0, a0
+; CHECK-ALIGNED-RV64-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_31:
+; CHECK-UNALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a4, 4(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a5, 4(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a6, 8(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a7, 8(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw t0, 12(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw t1, 12(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT:    xor a4, a4, a5
+; CHECK-UNALIGNED-RV32-NEXT:    xor a3, a6, a7
+; CHECK-UNALIGNED-RV32-NEXT:    xor a5, t0, t1
+; CHECK-UNALIGNED-RV32-NEXT:    lw a6, 16(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a7, 16(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw t0, 20(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw t1, 20(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw t2, 24(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw t3, 24(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a0, 27(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a1, 27(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    xor a6, a6, a7
+; CHECK-UNALIGNED-RV32-NEXT:    xor a7, t0, t1
+; CHECK-UNALIGNED-RV32-NEXT:    xor t0, t2, t3
+; CHECK-UNALIGNED-RV32-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT:    or a2, a2, a4
+; CHECK-UNALIGNED-RV32-NEXT:    or a3, a3, a5
+; CHECK-UNALIGNED-RV32-NEXT:    or a1, a6, a7
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, t0, a0
+; CHECK-UNALIGNED-RV32-NEXT:    or a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, a1, a0
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-NEXT:    seqz a0, a0
+; CHECK-UNALIGNED-RV32-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_31:
+; CHECK-UNALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:    ld a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a3, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a4, 8(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a5, 8(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a6, 16(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a7, 16(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a0, 23(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a1, 23(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:    xor a4, a4, a5
+; CHECK-UNALIGNED-RV64-NEXT:    xor a3, a6, a7
+; CHECK-UNALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:    or a2, a2, a4
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a3, a0
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT:    seqz a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:    ret
+entry:
+  %bcmp = call i32 @bcmp(i8* %s1, i8* %s2, iXLen 31)
+  %ret = icmp eq i32 %bcmp, 0
+  ret i1 %ret
+}
+
+define i1 @memcmp_size_15(i8* %s1, i8* %s2) {
+; CHECK-ALIGNED-RV32-LABEL: memcmp_size_15:
+; CHECK-ALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a5, 3(a0)
+; CHECK-ALIGNED-RV32-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV32-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV32-NEXT:    slli a5, a5, 24
+; CHECK-ALIGNED-RV32-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV32-NEXT:    or a2, a4, a2
+; CHECK-ALIGNED-RV32-NEXT:    lbu a3, 1(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a4, 0(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a5, 2(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a6, 3(a1)
+; CHECK-ALIGNED-RV32-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV32-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV32-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV32-NEXT:    or a4, a6, a5
+; CHECK-ALIGNED-RV32-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV32-NEXT:    xor a2, a2, a3
+; CHECK-ALIGNED-RV32-NEXT:    lbu a3, 5(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a4, 4(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a5, 6(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a6, 7(a0)
+; CHECK-ALIGNED-RV32-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV32-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV32-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV32-NEXT:    or a4, a6, a5
+; CHECK-ALIGNED-RV32-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV32-NEXT:    lbu a4, 5(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a5, 4(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a6, 6(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a7, 7(a1)
+; CHECK-ALIGNED-RV32-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV32-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV32-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV32-NEXT:    or a5, a7, a6
+; CHECK-ALIGNED-RV32-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV32-NEXT:    xor a3, a3, a4
+; CHECK-ALIGNED-RV32-NEXT:    lbu a4, 9(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a5, 8(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a6, 10(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a7, 11(a0)
+; CHECK-ALIGNED-RV32-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV32-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV32-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV32-NEXT:    or a5, a7, a6
+; CHECK-ALIGNED-RV32-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV32-NEXT:    lbu a5, 9(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a6, 8(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a7, 10(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu t0, 11(a1)
+; CHECK-ALIGNED-RV32-NEXT:    slli a5, a5, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV32-NEXT:    slli a7, a7, 16
+; CHECK-ALIGNED-RV32-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV32-NEXT:    or a6, t0, a7
+; CHECK-ALIGNED-RV32-NEXT:    lbu a7, 13(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu t0, 12(a0)
+; CHECK-ALIGNED-RV32-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV32-NEXT:    xor a4, a4, a5
+; CHECK-ALIGNED-RV32-NEXT:    slli a7, a7, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a5, a7, t0
+; CHECK-ALIGNED-RV32-NEXT:    lbu a6, 13(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a7, 12(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a0, 14(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a1, 14(a1)
+; CHECK-ALIGNED-RV32-NEXT:    slli a6, a6, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a6, a6, a7
+; CHECK-ALIGNED-RV32-NEXT:    xor a5, a5, a6
+; CHECK-ALIGNED-RV32-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV32-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV32-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV32-NEXT:    or a2, a2, a4
+; CHECK-ALIGNED-RV32-NEXT:    or a0, a2, a0
+; CHECK-ALIGNED-RV32-NEXT:    seqz a0, a0
+; CHECK-ALIGNED-RV32-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-LABEL: memcmp_size_15:
+; CHECK-ALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 3(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a4, a2
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 5(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 4(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 6(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 7(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    slli a3, a3, 32
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a3, a2
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 1(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 0(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 2(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 3(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 5(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 4(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 6(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 7(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a7, a6
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 32
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    xor a2, a2, a3
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 9(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 8(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 10(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 11(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 9(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 8(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 10(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 11(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a7, a6
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 13(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 12(a0)
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    xor a3, a3, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a6, a7
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 13(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 12(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a0, 14(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a1, 14(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-NEXT:    xor a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a4, a0
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a3, a0
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-ALIGNED-RV64-NEXT:    seqz a0, a0
+; CHECK-ALIGNED-RV64-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-LABEL: memcmp_size_15:
+; CHECK-UNALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a4, 4(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a5, 4(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a6, 8(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a7, 8(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a0, 11(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a1, 11(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT:    xor a4, a4, a5
+; CHECK-UNALIGNED-RV32-NEXT:    xor a3, a6, a7
+; CHECK-UNALIGNED-RV32-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT:    or a2, a2, a4
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, a3, a0
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-NEXT:    seqz a0, a0
+; CHECK-UNALIGNED-RV32-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: memcmp_size_15:
+; CHECK-UNALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:    ld a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a3, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a0, 7(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a1, 7(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT:    seqz a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:    ret
+entry:
+  %memcmp = call i32 @memcmp(i8* %s1, i8* %s2, iXLen 15)
+  %ret = icmp eq i32 %memcmp, 0
+  ret i1 %ret
+}
+
+define i1 @memcmp_size_31(i8* %s1, i8* %s2) {
+; CHECK-ALIGNED-RV32-LABEL: memcmp_size_31:
+; CHECK-ALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-NEXT:    addi sp, sp, -16
+; CHECK-ALIGNED-RV32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-ALIGNED-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-ALIGNED-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-ALIGNED-RV32-NEXT:    li a2, 31
+; CHECK-ALIGNED-RV32-NEXT:    call memcmp
+; CHECK-ALIGNED-RV32-NEXT:    seqz a0, a0
+; CHECK-ALIGNED-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-ALIGNED-RV32-NEXT:    addi sp, sp, 16
+; CHECK-ALIGNED-RV32-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-LABEL: memcmp_size_31:
+; CHECK-ALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 3(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a4, a2
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 5(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 4(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 6(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 7(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    slli a3, a3, 32
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a3, a2
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 1(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 0(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 2(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 3(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 5(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 4(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 6(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 7(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a7, a6
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 32
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    xor a2, a2, a3
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 9(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 8(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 10(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 11(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 13(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 12(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 14(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 15(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a7, a6
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 32
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 9(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 8(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 10(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 11(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a7, a6
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 13(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 12(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 14(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 15(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a6, t0, a7
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 32
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    xor a3, a3, a4
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 17(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 16(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 18(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 19(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a7, a6
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 21(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 20(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 22(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 23(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a6, t0, a7
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 32
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 17(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 16(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 18(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 19(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a6, t0, a7
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 21(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 20(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 22(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t1, 23(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a6, a6, a7
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli t1, t1, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a7, t1, t0
+; CHECK-ALIGNED-RV64-NEXT:    or a6, a7, a6
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 32
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    xor a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 25(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 24(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 26(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 27(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a6, t0, a7
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 25(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 24(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 26(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t1, 27(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a6, a6, a7
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli t1, t1, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a7, t1, t0
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 29(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t1, 28(a0)
+; CHECK-ALIGNED-RV64-NEXT:    or a6, a7, a6
+; CHECK-ALIGNED-RV64-NEXT:    xor a5, a5, a6
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a6, t0, t1
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 29(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 28(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a0, 30(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a1, 30(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a7, a7, t0
+; CHECK-ALIGNED-RV64-NEXT:    xor a6, a6, a7
+; CHECK-ALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a6, a0
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a2, a4
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-ALIGNED-RV64-NEXT:    seqz a0, a0
+; CHECK-ALIGNED-RV64-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-LABEL: memcmp_size_31:
+; CHECK-UNALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a4, 4(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a5, 4(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a6, 8(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a7, 8(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw t0, 12(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw t1, 12(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT:    xor a4, a4, a5
+; CHECK-UNALIGNED-RV32-NEXT:    xor a3, a6, a7
+; CHECK-UNALIGNED-RV32-NEXT:    xor a5, t0, t1
+; CHECK-UNALIGNED-RV32-NEXT:    lw a6, 16(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a7, 16(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw t0, 20(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw t1, 20(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw t2, 24(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw t3, 24(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a0, 27(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a1, 27(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    xor a6, a6, a7
+; CHECK-UNALIGNED-RV32-NEXT:    xor a7, t0, t1
+; CHECK-UNALIGNED-RV32-NEXT:    xor t0, t2, t3
+; CHECK-UNALIGNED-RV32-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT:    or a2, a2, a4
+; CHECK-UNALIGNED-RV32-NEXT:    or a3, a3, a5
+; CHECK-UNALIGNED-RV32-NEXT:    or a1, a6, a7
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, t0, a0
+; CHECK-UNALIGNED-RV32-NEXT:    or a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, a1, a0
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-NEXT:    seqz a0, a0
+; CHECK-UNALIGNED-RV32-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: memcmp_size_31:
+; CHECK-UNALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:    ld a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a3, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a4, 8(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a5, 8(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a6, 16(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a7, 16(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a0, 23(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a1, 23(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:    xor a4, a4, a5
+; CHECK-UNALIGNED-RV64-NEXT:    xor a3, a6, a7
+; CHECK-UNALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:    or a2, a2, a4
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a3, a0
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT:    seqz a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:    ret
+entry:
+  %memcmp = call i32 @memcmp(i8* %s1, i8* %s2, iXLen 31)
+  %ret = icmp eq i32 %memcmp, 0
+  ret i1 %ret
+}

>From 2caea13ab7795c32476a7102028b9ac0a3ebf9b6 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Fri, 6 Sep 2024 18:07:49 +0800
Subject: [PATCH 2/5] Fix copt-paste mistake

Created using spr 1.3.6-beta.1
---
 llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index ad532aadc83266..ae5ebf16dc6da0 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2119,7 +2119,7 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
   TTI::MemCmpExpansionOptions Options;
   // FIXME: Vector haven't been tested.
   Options.AllowOverlappingLoads =
-      (ST->enableUnalignedScalarMem() || ST->enableUnalignedScalarMem());
+      (ST->enableUnalignedScalarMem() || ST->enableUnalignedVectorMem());
   Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
   Options.NumLoadsPerBlock = Options.MaxNumLoads;
   if (ST->is64Bit())

>From a96e1aaf9a4cbe8e8dd09f4f4b1260b5c63541df Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Mon, 9 Sep 2024 21:44:22 +0800
Subject: [PATCH 3/5] Don't add 5/6 to AllowedTailExpansions for RV32

Created using spr 1.3.6-beta.1
---
 llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index ae5ebf16dc6da0..f2fe52bb939e07 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2125,6 +2125,8 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
   if (ST->is64Bit())
     Options.LoadSizes.push_back(8);
   llvm::append_range(Options.LoadSizes, ArrayRef({4, 2, 1}));
-  Options.AllowedTailExpansions = {3, 5, 6};
+  Options.AllowedTailExpansions = {3};
+  if (ST->is64Bit())
+    llvm::append_range(Options.AllowedTailExpansions, ArrayRef{5, 6});
   return Options;
 }

>From 86b823d7f70d847b140af48cd1c0c7a461897d77 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Mon, 9 Sep 2024 21:52:13 +0800
Subject: [PATCH 4/5] Remove AllowedTailExpansions

Created using spr 1.3.6-beta.1
---
 llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index f2fe52bb939e07..2ec4483f072d5a 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2125,8 +2125,5 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
   if (ST->is64Bit())
     Options.LoadSizes.push_back(8);
   llvm::append_range(Options.LoadSizes, ArrayRef({4, 2, 1}));
-  Options.AllowedTailExpansions = {3};
-  if (ST->is64Bit())
-    llvm::append_range(Options.AllowedTailExpansions, ArrayRef{5, 6});
   return Options;
 }

>From e709f8d43874d8370ec39a29e8e05e6a4f612da0 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Fri, 13 Sep 2024 13:13:09 +0800
Subject: [PATCH 5/5] Explicitly set the LoadSizes

Created using spr 1.3.6-beta.1
---
 llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 2ec4483f072d5a..9bc262c8a06a33 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2123,7 +2123,8 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
   Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
   Options.NumLoadsPerBlock = Options.MaxNumLoads;
   if (ST->is64Bit())
-    Options.LoadSizes.push_back(8);
-  llvm::append_range(Options.LoadSizes, ArrayRef({4, 2, 1}));
+    Options.LoadSizes = {8, 4, 2, 1};
+  else
+    Options.LoadSizes = {4, 2, 1};
   return Options;
 }



More information about the llvm-branch-commits mailing list