[llvm] ca29c63 - [RISCV] Support non-power-of-2 types when expanding memcmp

via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 18 01:11:23 PDT 2025


Author: Pengcheng Wang
Date: 2025-06-18T16:11:18+08:00
New Revision: ca29c632f06fc0e02ebbbb9fbdc73e3abd6b096b

URL: https://github.com/llvm/llvm-project/commit/ca29c632f06fc0e02ebbbb9fbdc73e3abd6b096b
DIFF: https://github.com/llvm/llvm-project/commit/ca29c632f06fc0e02ebbbb9fbdc73e3abd6b096b.diff

LOG: [RISCV] Support non-power-of-2 types when expanding memcmp

We can convert non-power-of-2 types into extended value types
and then they will be widen.

Reviewers: lukel97

Reviewed By: lukel97

Pull Request: https://github.com/llvm/llvm-project/pull/114971

Added: 
    llvm/test/CodeGen/RISCV/icmp-non-byte-sized.ll

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
    llvm/test/CodeGen/RISCV/memcmp-optsize.ll
    llvm/test/CodeGen/RISCV/memcmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e670567bd1844..b8ef221742a26 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16190,10 +16190,6 @@ combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
     return SDValue();
 
   unsigned OpSize = OpVT.getSizeInBits();
-  // TODO: Support non-power-of-2 types.
-  if (!isPowerOf2_32(OpSize))
-    return SDValue();
-
   // The size should be larger than XLen and smaller than the maximum vector
   // size.
   if (OpSize <= Subtarget.getXLen() ||
@@ -16214,14 +16210,25 @@ combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
           Attribute::NoImplicitFloat))
     return SDValue();
 
+  // Bail out for non-byte-sized types.
+  if (!OpVT.isByteSized())
+    return SDValue();
+
   unsigned VecSize = OpSize / 8;
-  EVT VecVT = MVT::getVectorVT(MVT::i8, VecSize);
-  EVT CmpVT = MVT::getVectorVT(MVT::i1, VecSize);
+  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, VecSize);
+  EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VecSize);
 
   SDValue VecX = DAG.getBitcast(VecVT, X);
   SDValue VecY = DAG.getBitcast(VecVT, Y);
-  SDValue Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETNE);
-  return DAG.getSetCC(DL, VT, DAG.getNode(ISD::VECREDUCE_OR, DL, XLenVT, Cmp),
+  SDValue Mask = DAG.getAllOnesConstant(DL, CmpVT);
+  SDValue VL = DAG.getConstant(VecSize, DL, XLenVT);
+
+  SDValue Cmp = DAG.getNode(ISD::VP_SETCC, DL, CmpVT, VecX, VecY,
+                            DAG.getCondCode(ISD::SETNE), Mask, VL);
+  return DAG.getSetCC(DL, VT,
+                      DAG.getNode(ISD::VP_REDUCE_OR, DL, XLenVT,
+                                  DAG.getConstant(0, DL, XLenVT), Cmp, Mask,
+                                  VL),
                       DAG.getConstant(0, DL, XLenVT), CC);
 }
 

diff  --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index aadda2ce85529..46e30ce4c18a9 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2985,20 +2985,13 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
   }
 
   if (IsZeroCmp && ST->hasVInstructions()) {
-    unsigned RealMinVLen = ST->getRealMinVLen();
-    // Support Fractional LMULs if the lengths are larger than XLen.
-    // TODO: Support non-power-of-2 types.
-    for (unsigned FLMUL = 8; FLMUL >= 2; FLMUL /= 2) {
-      unsigned Len = RealMinVLen / FLMUL;
-      if (Len > ST->getXLen())
-        Options.LoadSizes.insert(Options.LoadSizes.begin(), Len / 8);
-    }
-    for (unsigned LMUL = 1; LMUL <= ST->getMaxLMULForFixedLengthVectors();
-         LMUL *= 2) {
-      unsigned Len = RealMinVLen * LMUL;
-      if (Len > ST->getXLen())
-        Options.LoadSizes.insert(Options.LoadSizes.begin(), Len / 8);
-    }
+    unsigned VLenB = ST->getRealMinVLen() / 8;
+    // The minimum size should be `XLen / 8 + 1`, and the maxinum size should be
+    // `VLenB * MaxLMUL` so that it fits in a single register group.
+    unsigned MinSize = ST->getXLen() / 8 + 1;
+    unsigned MaxSize = VLenB * ST->getMaxLMULForFixedLengthVectors();
+    for (unsigned Size = MinSize; Size <= MaxSize; Size++)
+      Options.LoadSizes.insert(Options.LoadSizes.begin(), Size);
   }
   return Options;
 }

diff  --git a/llvm/test/CodeGen/RISCV/icmp-non-byte-sized.ll b/llvm/test/CodeGen/RISCV/icmp-non-byte-sized.ll
new file mode 100644
index 0000000000000..fca6238548aab
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/icmp-non-byte-sized.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+v -O2 < %s | FileCheck %s --check-prefix=CHECK-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -O2 < %s | FileCheck %s --check-prefix=CHECK-RV64
+
+define i1 @icmp_non_byte_type(ptr %p1, ptr %p2) nounwind {
+; CHECK-RV32-LABEL: icmp_non_byte_type:
+; CHECK-RV32:       # %bb.0:
+; CHECK-RV32-NEXT:    lw a2, 0(a0)
+; CHECK-RV32-NEXT:    lw a3, 4(a0)
+; CHECK-RV32-NEXT:    lw a4, 8(a0)
+; CHECK-RV32-NEXT:    lw a0, 12(a0)
+; CHECK-RV32-NEXT:    lw a5, 12(a1)
+; CHECK-RV32-NEXT:    lw a6, 4(a1)
+; CHECK-RV32-NEXT:    lw a7, 8(a1)
+; CHECK-RV32-NEXT:    lw a1, 0(a1)
+; CHECK-RV32-NEXT:    xor a0, a0, a5
+; CHECK-RV32-NEXT:    xor a3, a3, a6
+; CHECK-RV32-NEXT:    xor a4, a4, a7
+; CHECK-RV32-NEXT:    xor a1, a2, a1
+; CHECK-RV32-NEXT:    or a0, a3, a0
+; CHECK-RV32-NEXT:    or a1, a1, a4
+; CHECK-RV32-NEXT:    or a0, a1, a0
+; CHECK-RV32-NEXT:    seqz a0, a0
+; CHECK-RV32-NEXT:    ret
+;
+; CHECK-RV64-LABEL: icmp_non_byte_type:
+; CHECK-RV64:       # %bb.0:
+; CHECK-RV64-NEXT:    ld a2, 0(a0)
+; CHECK-RV64-NEXT:    ld a0, 8(a0)
+; CHECK-RV64-NEXT:    ld a3, 8(a1)
+; CHECK-RV64-NEXT:    ld a1, 0(a1)
+; CHECK-RV64-NEXT:    xor a0, a0, a3
+; CHECK-RV64-NEXT:    xor a1, a2, a1
+; CHECK-RV64-NEXT:    or a0, a1, a0
+; CHECK-RV64-NEXT:    seqz a0, a0
+; CHECK-RV64-NEXT:    ret
+  %v1 = load i127, ptr %p1
+  %v2 = load i127, ptr %p2
+  %ret = icmp eq i127 %v1, %v2
+  ret i1 %ret
+}

diff  --git a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
index 3742383675b96..0d57e4201512e 100644
--- a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
+++ b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
@@ -517,17 +517,99 @@ define i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind optsize {
 ; CHECK-ALIGNED-RV64-V-NEXT:    addi sp, sp, 16
 ; CHECK-ALIGNED-RV64-V-NEXT:    ret
 ;
-; CHECK-UNALIGNED-LABEL: bcmp_size_5:
-; CHECK-UNALIGNED:       # %bb.0: # %entry
-; CHECK-UNALIGNED-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-NEXT:    lbu a0, 4(a0)
-; CHECK-UNALIGNED-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-NEXT:    lbu a1, 4(a1)
-; CHECK-UNALIGNED-NEXT:    xor a2, a2, a3
-; CHECK-UNALIGNED-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-NEXT:    or a0, a2, a0
-; CHECK-UNALIGNED-NEXT:    snez a0, a0
-; CHECK-UNALIGNED-NEXT:    ret
+; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-V-NEXT:    vsetivli zero, 5, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
+; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-V-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 5)
   ret i32 %bcmp
@@ -614,17 +696,99 @@ define i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind optsize {
 ; CHECK-ALIGNED-RV64-V-NEXT:    addi sp, sp, 16
 ; CHECK-ALIGNED-RV64-V-NEXT:    ret
 ;
-; CHECK-UNALIGNED-LABEL: bcmp_size_6:
-; CHECK-UNALIGNED:       # %bb.0: # %entry
-; CHECK-UNALIGNED-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-NEXT:    lhu a0, 4(a0)
-; CHECK-UNALIGNED-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-NEXT:    lhu a1, 4(a1)
-; CHECK-UNALIGNED-NEXT:    xor a2, a2, a3
-; CHECK-UNALIGNED-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-NEXT:    or a0, a2, a0
-; CHECK-UNALIGNED-NEXT:    snez a0, a0
-; CHECK-UNALIGNED-NEXT:    ret
+; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-V-NEXT:    vsetivli zero, 6, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
+; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-V-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 6)
   ret i32 %bcmp
@@ -711,17 +875,99 @@ define i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind optsize {
 ; CHECK-ALIGNED-RV64-V-NEXT:    addi sp, sp, 16
 ; CHECK-ALIGNED-RV64-V-NEXT:    ret
 ;
-; CHECK-UNALIGNED-LABEL: bcmp_size_7:
-; CHECK-UNALIGNED:       # %bb.0: # %entry
-; CHECK-UNALIGNED-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-NEXT:    lw a0, 3(a0)
-; CHECK-UNALIGNED-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-NEXT:    lw a1, 3(a1)
-; CHECK-UNALIGNED-NEXT:    xor a2, a2, a3
-; CHECK-UNALIGNED-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-NEXT:    or a0, a2, a0
-; CHECK-UNALIGNED-NEXT:    snez a0, a0
-; CHECK-UNALIGNED-NEXT:    ret
+; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV32-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV32-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-V-NEXT:    vsetivli zero, 7, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
+; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-V-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 7)
   ret i32 %bcmp
@@ -1069,33 +1315,21 @@ define i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind optsize {
 ;
 ; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_15:
 ; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a3, 4(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a4, 7(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a0, 11(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a5, 0(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a6, 4(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a7, 7(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a1, 11(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, a2, a5
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a3, a3, a6
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a4, a4, a7
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a3, a0
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a2, a2, a4
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-V-NEXT:    vsetivli zero, 15, e8, m1, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV32-V-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_15:
 ; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a2, 0(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a0, 7(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a3, 0(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a1, 7(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a3
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    vsetivli zero, 15, e8, m1, ta, ma
+; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
@@ -1477,57 +1711,21 @@ define i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind optsize {
 ;
 ; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_31:
 ; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a3, 4(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a4, 8(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a5, 12(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a6, 0(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a7, 4(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t0, 8(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t1, 12(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t2, 15(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t3, 19(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t4, 23(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a0, 27(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a3, a3, a7
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a5, a5, t1
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a7, 15(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t1, 19(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t5, 23(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a1, 27(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, a2, a6
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a4, a4, t0
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a6, t3, t1
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a1, t2, a7
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a7, t4, t5
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a4, a4, a7
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a1, a2, a1
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a5, a0
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a2, a3, a6
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a2, a0
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a1, a1, a4
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a1, a0
+; CHECK-UNALIGNED-RV32-V-NEXT:    vsetivli zero, 31, e8, m2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v10, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v12, v8, v10
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v12
 ; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV32-V-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_31:
 ; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a2, 0(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a3, 8(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a4, 15(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a0, 23(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a5, 0(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a6, 8(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a7, 15(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a1, 23(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a5
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a3, a3, a6
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a4, a4, a7
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a3, a0
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a2, a2, a4
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    vsetivli zero, 31, e8, m2, ta, ma
+; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v10, (a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v12, v8, v10
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v12
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
@@ -1875,129 +2073,23 @@ define i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind optsize {
 ;
 ; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_63:
 ; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-V-NEXT:    addi sp, sp, -48
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s0, 44(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s1, 40(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s2, 36(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s3, 32(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s4, 28(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s5, 24(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s6, 20(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s7, 16(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s8, 12(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s9, 8(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s10, 4(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a2, 16(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a3, 20(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a4, 24(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a5, 28(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a6, 0(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a7, 4(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t0, 8(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t1, 12(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t2, 16(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t3, 20(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t4, 24(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t5, 28(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t6, 0(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s0, 4(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s1, 8(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s2, 12(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s3, 47(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s4, 51(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s5, 55(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s6, 59(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s7, 31(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s8, 35(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s9, 39(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a0, 43(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t1, t1, s2
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a5, a5, t5
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a7, a7, s0
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t5, 31(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s0, 35(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s2, 39(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s10, 43(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a3, a3, t3
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t0, t0, s1
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a4, a4, t4
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t3, 47(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t4, 51(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s1, 55(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a1, 59(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a6, a6, t6
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, a2, t2
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a0, a0, s10
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a1, s6, a1
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t2, s8, s0
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t4, s4, t4
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t6, s9, s2
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s0, s5, s1
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t5, s7, t5
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t3, s3, t3
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a2, a2, t3
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a6, a6, t5
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a4, a4, s0
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t0, t0, t6
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a3, a3, t4
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a7, a7, t2
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a1, a5, a1
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, t1, a0
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a0, a1
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a1, a7, a3
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a3, t0, a4
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a2, a6, a2
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a1, a0
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a2, a2, a3
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-V-NEXT:    li a2, 63
+; CHECK-UNALIGNED-RV32-V-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v12, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v16, v8, v12
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v16
 ; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s0, 44(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s1, 40(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s2, 36(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s3, 32(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s4, 28(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s5, 24(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s6, 20(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s7, 16(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s8, 12(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s9, 8(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s10, 4(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    addi sp, sp, 48
 ; CHECK-UNALIGNED-RV32-V-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_63:
 ; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a2, 0(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a3, 8(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a4, 16(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a5, 24(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a6, 0(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a7, 8(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t0, 16(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t1, 24(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t2, 31(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t3, 39(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t4, 47(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a0, 55(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a3, a3, a7
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a5, a5, t1
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a7, 31(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t1, 39(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t5, 47(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a1, 55(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a6
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a4, a4, t0
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a6, t3, t1
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a1, t2, a7
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a7, t4, t5
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a4, a4, a7
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a1, a2, a1
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a5, a0
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a2, a3, a6
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a1, a1, a4
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a1, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    li a2, 63
+; CHECK-UNALIGNED-RV64-V-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v12, (a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v16, v8, v12
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v16
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
@@ -2315,270 +2407,24 @@ define i32 @bcmp_size_127(ptr %s1, ptr %s2) nounwind optsize {
 ;
 ; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_127:
 ; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-V-NEXT:    addi sp, sp, -96
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw ra, 92(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s0, 88(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s1, 84(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s2, 80(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s3, 76(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s4, 72(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s5, 68(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s6, 64(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s7, 60(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s8, 56(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s9, 52(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s10, 48(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s11, 44(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a5, 32(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t5, 36(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t4, 40(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a6, 44(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t2, 0(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t1, 4(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s0, 8(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a4, 12(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t3, 48(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t0, 52(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s2, 56(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a3, 60(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s1, 16(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a7, 20(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t6, 24(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a2, 28(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s4, 12(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s5, 60(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s6, 16(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s7, 20(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s8, 24(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s9, 28(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s3, 32(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s10, 36(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s11, 40(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw ra, 44(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, a2, s9
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw a2, 40(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, a3, s5
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw a2, 36(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, a4, s4
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw a2, 32(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s4, 56(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s5, 48(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s9, 52(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, a6, ra
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw a2, 28(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw ra, 4(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, a7, s7
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw a2, 24(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, t0, s9
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw a2, 20(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s7, 0(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s9, 8(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, t1, ra
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw a2, 16(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw ra, 107(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, t5, s10
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw a2, 12(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s10, 75(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t6, t6, s8
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s8, 123(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s2, s2, s4
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s0, s0, s9
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t5, t4, s11
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s4, 83(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s9, 87(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s11, 91(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s1, s1, s6
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s6, 107(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t4, t3, s5
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s5, 91(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t3, t2, s7
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s7, 123(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t2, a5, s3
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s3, 75(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s5, s11, s5
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s7, s8, s7
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s8, 87(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s11, 83(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s3, s10, s3
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s10, 115(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s6, ra, s6
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw ra, 115(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s4, s4, s11
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s11, 119(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a7, 119(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s10, s10, ra
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw ra, 71(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a5, 67(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a2, 67(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a6, 71(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a4, 99(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a3, 99(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t1, a5, a2
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a5, 103(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a2, 103(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t0, a4, a3
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a4, s9, s8
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a7, s11, a7
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a6, ra, a6
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a3, a5, a2
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a5, 95(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s8, 63(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s9, 111(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a2, 79(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s11, 79(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw ra, 111(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a0, 63(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a1, 95(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, a2, s11
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s9, s9, ra
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a0, s8, a0
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a1, a5, a1
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a1, t2, a1
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, t3, a0
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a5, t4, s9
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a2, s1, a2
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a3, t5, a3
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a6, s0, a6
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a7, s2, a7
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a4, t6, a4
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t2, 12(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t0, t2, t0
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t2, 16(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t1, t2, t1
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t2, 20(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t2, t2, s10
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t3, 24(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t3, t3, s4
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t4, 28(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t4, t4, s6
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t5, 32(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t5, t5, s3
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t6, 36(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t6, t6, s7
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    or s0, s0, s5
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t6, s0, t6
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t4, t5, t4
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t2, t3, t2
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t0, t1, t0
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a4, a4, a7
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a3, a6, a3
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a2, a2, a5
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a0, a1
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a1, t4, t6
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a5, t0, t2
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a3, a3, a4
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a0, a2
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a1, a5, a1
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a0, a3
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a0, a1
+; CHECK-UNALIGNED-RV32-V-NEXT:    li a2, 127
+; CHECK-UNALIGNED-RV32-V-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v16, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v24, v8, v16
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v24
 ; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw ra, 92(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s0, 88(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s1, 84(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s2, 80(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s3, 76(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s4, 72(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s5, 68(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s6, 64(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s7, 60(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s8, 56(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s9, 52(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s10, 48(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s11, 44(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    addi sp, sp, 96
 ; CHECK-UNALIGNED-RV32-V-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_127:
 ; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-V-NEXT:    addi sp, sp, -96
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s0, 88(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s1, 80(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s2, 72(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s3, 64(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s4, 56(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s5, 48(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s6, 40(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s7, 32(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s8, 24(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s9, 16(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s10, 8(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a2, 32(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a3, 40(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a4, 48(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a5, 56(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a6, 0(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a7, 8(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t0, 16(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t1, 24(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t2, 32(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t3, 40(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t4, 48(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t5, 56(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t6, 0(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s0, 8(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s1, 16(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s2, 24(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s3, 95(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s4, 103(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s5, 111(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s6, 119(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s7, 63(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s8, 71(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s9, 79(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a0, 87(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor t1, t1, s2
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a5, a5, t5
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a7, a7, s0
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t5, 63(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s0, 71(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s2, 79(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s10, 87(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a3, a3, t3
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor t0, t0, s1
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a4, a4, t4
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t3, 95(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t4, 103(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s1, 111(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a1, 119(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a6, a6, t6
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, t2
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, s10
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a1, s6, a1
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor t2, s8, s0
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor t4, s4, t4
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor t6, s9, s2
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor s0, s5, s1
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor t5, s7, t5
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor t3, s3, t3
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a2, a2, t3
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a6, a6, t5
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a4, a4, s0
-; CHECK-UNALIGNED-RV64-V-NEXT:    or t0, t0, t6
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a3, a3, t4
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a7, a7, t2
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a1, a5, a1
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, t1, a0
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a0, a1
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a1, a7, a3
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a3, t0, a4
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a2, a6, a2
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a1, a0
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a2, a2, a3
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    li a2, 127
+; CHECK-UNALIGNED-RV64-V-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v16, (a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v24, v8, v16
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v24
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s0, 88(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s1, 80(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s2, 72(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s3, 64(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s4, 56(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s5, 48(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s6, 40(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s7, 32(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s8, 24(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s9, 16(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s10, 8(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    addi sp, sp, 96
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 127)

diff  --git a/llvm/test/CodeGen/RISCV/memcmp.ll b/llvm/test/CodeGen/RISCV/memcmp.ll
index f9a6dbba04fc6..0caab1f5ce2f0 100644
--- a/llvm/test/CodeGen/RISCV/memcmp.ll
+++ b/llvm/test/CodeGen/RISCV/memcmp.ll
@@ -517,17 +517,99 @@ define i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind {
 ; CHECK-ALIGNED-RV64-V-NEXT:    addi sp, sp, 16
 ; CHECK-ALIGNED-RV64-V-NEXT:    ret
 ;
-; CHECK-UNALIGNED-LABEL: bcmp_size_5:
-; CHECK-UNALIGNED:       # %bb.0: # %entry
-; CHECK-UNALIGNED-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-NEXT:    lbu a0, 4(a0)
-; CHECK-UNALIGNED-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-NEXT:    lbu a1, 4(a1)
-; CHECK-UNALIGNED-NEXT:    xor a2, a2, a3
-; CHECK-UNALIGNED-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-NEXT:    or a0, a2, a0
-; CHECK-UNALIGNED-NEXT:    snez a0, a0
-; CHECK-UNALIGNED-NEXT:    ret
+; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-V-NEXT:    vsetivli zero, 5, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
+; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-V-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 5)
   ret i32 %bcmp
@@ -614,17 +696,99 @@ define i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind {
 ; CHECK-ALIGNED-RV64-V-NEXT:    addi sp, sp, 16
 ; CHECK-ALIGNED-RV64-V-NEXT:    ret
 ;
-; CHECK-UNALIGNED-LABEL: bcmp_size_6:
-; CHECK-UNALIGNED:       # %bb.0: # %entry
-; CHECK-UNALIGNED-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-NEXT:    lhu a0, 4(a0)
-; CHECK-UNALIGNED-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-NEXT:    lhu a1, 4(a1)
-; CHECK-UNALIGNED-NEXT:    xor a2, a2, a3
-; CHECK-UNALIGNED-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-NEXT:    or a0, a2, a0
-; CHECK-UNALIGNED-NEXT:    snez a0, a0
-; CHECK-UNALIGNED-NEXT:    ret
+; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-V-NEXT:    vsetivli zero, 6, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
+; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-V-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 6)
   ret i32 %bcmp
@@ -711,17 +875,99 @@ define i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind {
 ; CHECK-ALIGNED-RV64-V-NEXT:    addi sp, sp, 16
 ; CHECK-ALIGNED-RV64-V-NEXT:    ret
 ;
-; CHECK-UNALIGNED-LABEL: bcmp_size_7:
-; CHECK-UNALIGNED:       # %bb.0: # %entry
-; CHECK-UNALIGNED-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-NEXT:    lw a0, 3(a0)
-; CHECK-UNALIGNED-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-NEXT:    lw a1, 3(a1)
-; CHECK-UNALIGNED-NEXT:    xor a2, a2, a3
-; CHECK-UNALIGNED-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-NEXT:    or a0, a2, a0
-; CHECK-UNALIGNED-NEXT:    snez a0, a0
-; CHECK-UNALIGNED-NEXT:    ret
+; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV32-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV32-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-V-NEXT:    vsetivli zero, 7, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
+; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-V-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 7)
   ret i32 %bcmp
@@ -1069,33 +1315,21 @@ define i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind {
 ;
 ; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_15:
 ; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a3, 4(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a4, 7(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a0, 11(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a5, 0(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a6, 4(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a7, 7(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a1, 11(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, a2, a5
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a3, a3, a6
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a4, a4, a7
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a3, a0
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a2, a2, a4
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-V-NEXT:    vsetivli zero, 15, e8, m1, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV32-V-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_15:
 ; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a2, 0(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a0, 7(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a3, 0(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a1, 7(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a3
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    vsetivli zero, 15, e8, m1, ta, ma
+; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
@@ -1555,57 +1789,21 @@ define i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind {
 ;
 ; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_31:
 ; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a3, 4(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a4, 8(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a5, 12(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a6, 0(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a7, 4(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t0, 8(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t1, 12(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t2, 15(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t3, 19(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t4, 23(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a0, 27(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a3, a3, a7
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a5, a5, t1
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a7, 15(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t1, 19(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t5, 23(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a1, 27(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, a2, a6
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a4, a4, t0
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a6, t3, t1
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a1, t2, a7
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a7, t4, t5
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a4, a4, a7
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a1, a2, a1
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a5, a0
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a2, a3, a6
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a2, a0
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a1, a1, a4
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a1, a0
+; CHECK-UNALIGNED-RV32-V-NEXT:    vsetivli zero, 31, e8, m2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v10, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v12, v8, v10
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v12
 ; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV32-V-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_31:
 ; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a2, 0(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a3, 8(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a4, 15(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a0, 23(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a5, 0(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a6, 8(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a7, 15(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a1, 23(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a5
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a3, a3, a6
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a4, a4, a7
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a3, a0
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a2, a2, a4
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    vsetivli zero, 31, e8, m2, ta, ma
+; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v10, (a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v12, v8, v10
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v12
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
@@ -2109,129 +2307,23 @@ define i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind {
 ;
 ; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_63:
 ; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-V-NEXT:    addi sp, sp, -48
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s0, 44(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s1, 40(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s2, 36(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s3, 32(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s4, 28(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s5, 24(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s6, 20(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s7, 16(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s8, 12(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s9, 8(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s10, 4(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a2, 16(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a3, 20(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a4, 24(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a5, 28(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a6, 0(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a7, 4(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t0, 8(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t1, 12(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t2, 16(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t3, 20(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t4, 24(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t5, 28(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t6, 0(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s0, 4(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s1, 8(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s2, 12(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s3, 47(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s4, 51(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s5, 55(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s6, 59(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s7, 31(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s8, 35(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s9, 39(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a0, 43(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t1, t1, s2
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a5, a5, t5
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a7, a7, s0
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t5, 31(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s0, 35(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s2, 39(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s10, 43(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a3, a3, t3
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t0, t0, s1
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a4, a4, t4
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t3, 47(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t4, 51(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s1, 55(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a1, 59(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a6, a6, t6
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, a2, t2
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a0, a0, s10
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a1, s6, a1
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t2, s8, s0
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t4, s4, t4
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t6, s9, s2
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s0, s5, s1
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t5, s7, t5
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t3, s3, t3
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a2, a2, t3
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a6, a6, t5
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a4, a4, s0
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t0, t0, t6
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a3, a3, t4
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a7, a7, t2
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a1, a5, a1
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, t1, a0
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a0, a1
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a1, a7, a3
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a3, t0, a4
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a2, a6, a2
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a1, a0
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a2, a2, a3
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-V-NEXT:    li a2, 63
+; CHECK-UNALIGNED-RV32-V-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v12, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v16, v8, v12
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v16
 ; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s0, 44(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s1, 40(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s2, 36(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s3, 32(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s4, 28(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s5, 24(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s6, 20(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s7, 16(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s8, 12(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s9, 8(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s10, 4(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    addi sp, sp, 48
 ; CHECK-UNALIGNED-RV32-V-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_63:
 ; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a2, 0(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a3, 8(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a4, 16(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a5, 24(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a6, 0(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a7, 8(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t0, 16(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t1, 24(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t2, 31(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t3, 39(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t4, 47(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a0, 55(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a3, a3, a7
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a5, a5, t1
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a7, 31(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t1, 39(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t5, 47(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a1, 55(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a6
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a4, a4, t0
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a6, t3, t1
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a1, t2, a7
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a7, t4, t5
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a4, a4, a7
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a1, a2, a1
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a5, a0
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a2, a3, a6
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a1, a1, a4
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a1, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    li a2, 63
+; CHECK-UNALIGNED-RV64-V-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v12, (a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v16, v8, v12
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v16
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
@@ -2627,270 +2719,24 @@ define i32 @bcmp_size_127(ptr %s1, ptr %s2) nounwind {
 ;
 ; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_127:
 ; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-V-NEXT:    addi sp, sp, -96
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw ra, 92(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s0, 88(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s1, 84(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s2, 80(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s3, 76(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s4, 72(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s5, 68(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s6, 64(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s7, 60(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s8, 56(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s9, 52(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s10, 48(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw s11, 44(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a5, 32(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t5, 36(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t4, 40(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a6, 44(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t2, 0(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t1, 4(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s0, 8(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a4, 12(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t3, 48(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t0, 52(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s2, 56(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a3, 60(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s1, 16(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a7, 20(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t6, 24(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a2, 28(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s4, 12(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s5, 60(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s6, 16(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s7, 20(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s8, 24(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s9, 28(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s3, 32(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s10, 36(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s11, 40(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw ra, 44(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, a2, s9
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw a2, 40(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, a3, s5
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw a2, 36(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, a4, s4
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw a2, 32(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s4, 56(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s5, 48(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s9, 52(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, a6, ra
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw a2, 28(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw ra, 4(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, a7, s7
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw a2, 24(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, t0, s9
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw a2, 20(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s7, 0(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s9, 8(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, t1, ra
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw a2, 16(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw ra, 107(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, t5, s10
-; CHECK-UNALIGNED-RV32-V-NEXT:    sw a2, 12(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s10, 75(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t6, t6, s8
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s8, 123(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s2, s2, s4
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s0, s0, s9
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t5, t4, s11
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s4, 83(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s9, 87(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s11, 91(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s1, s1, s6
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s6, 107(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t4, t3, s5
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s5, 91(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t3, t2, s7
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s7, 123(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t2, a5, s3
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s3, 75(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s5, s11, s5
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s7, s8, s7
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s8, 87(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s11, 83(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s3, s10, s3
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s10, 115(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s6, ra, s6
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw ra, 115(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s4, s4, s11
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s11, 119(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a7, 119(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s10, s10, ra
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw ra, 71(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a5, 67(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a2, 67(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a6, 71(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a4, 99(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a3, 99(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t1, a5, a2
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a5, 103(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a2, 103(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor t0, a4, a3
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a4, s9, s8
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a7, s11, a7
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a6, ra, a6
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a3, a5, a2
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a5, 95(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s8, 63(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s9, 111(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a2, 79(a0)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s11, 79(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw ra, 111(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a0, 63(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw a1, 95(a1)
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a2, a2, s11
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor s9, s9, ra
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a0, s8, a0
-; CHECK-UNALIGNED-RV32-V-NEXT:    xor a1, a5, a1
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a1, t2, a1
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, t3, a0
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a5, t4, s9
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a2, s1, a2
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a3, t5, a3
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a6, s0, a6
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a7, s2, a7
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a4, t6, a4
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t2, 12(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t0, t2, t0
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t2, 16(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t1, t2, t1
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t2, 20(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t2, t2, s10
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t3, 24(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t3, t3, s4
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t4, 28(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t4, t4, s6
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t5, 32(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t5, t5, s3
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw t6, 36(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t6, t6, s7
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    or s0, s0, s5
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t6, s0, t6
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t4, t5, t4
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t2, t3, t2
-; CHECK-UNALIGNED-RV32-V-NEXT:    or t0, t1, t0
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a4, a4, a7
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a3, a6, a3
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a2, a2, a5
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a0, a1
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a1, t4, t6
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a5, t0, t2
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a3, a3, a4
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a0, a2
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a1, a5, a1
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a0, a3
-; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a0, a1
+; CHECK-UNALIGNED-RV32-V-NEXT:    li a2, 127
+; CHECK-UNALIGNED-RV32-V-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v16, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v24, v8, v16
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v24
 ; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw ra, 92(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s0, 88(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s1, 84(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s2, 80(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s3, 76(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s4, 72(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s5, 68(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s6, 64(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s7, 60(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s8, 56(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s9, 52(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s10, 48(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    lw s11, 44(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-V-NEXT:    addi sp, sp, 96
 ; CHECK-UNALIGNED-RV32-V-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_127:
 ; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-V-NEXT:    addi sp, sp, -96
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s0, 88(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s1, 80(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s2, 72(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s3, 64(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s4, 56(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s5, 48(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s6, 40(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s7, 32(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s8, 24(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s9, 16(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    sd s10, 8(sp) # 8-byte Folded Spill
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a2, 32(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a3, 40(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a4, 48(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a5, 56(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a6, 0(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a7, 8(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t0, 16(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t1, 24(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t2, 32(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t3, 40(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t4, 48(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t5, 56(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t6, 0(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s0, 8(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s1, 16(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s2, 24(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s3, 95(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s4, 103(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s5, 111(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s6, 119(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s7, 63(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s8, 71(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s9, 79(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a0, 87(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor t1, t1, s2
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a5, a5, t5
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a7, a7, s0
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t5, 63(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s0, 71(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s2, 79(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s10, 87(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a3, a3, t3
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor t0, t0, s1
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a4, a4, t4
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t3, 95(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld t4, 103(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s1, 111(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a1, 119(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a6, a6, t6
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, t2
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, s10
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a1, s6, a1
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor t2, s8, s0
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor t4, s4, t4
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor t6, s9, s2
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor s0, s5, s1
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor t5, s7, t5
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor t3, s3, t3
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a2, a2, t3
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a6, a6, t5
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a4, a4, s0
-; CHECK-UNALIGNED-RV64-V-NEXT:    or t0, t0, t6
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a3, a3, t4
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a7, a7, t2
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a1, a5, a1
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, t1, a0
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a0, a1
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a1, a7, a3
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a3, t0, a4
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a2, a6, a2
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a1, a0
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a2, a2, a3
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    li a2, 127
+; CHECK-UNALIGNED-RV64-V-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v16, (a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v24, v8, v16
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v24
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s0, 88(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s1, 80(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s2, 72(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s3, 64(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s4, 56(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s5, 48(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s6, 40(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s7, 32(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s8, 24(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s9, 16(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld s10, 8(sp) # 8-byte Folded Reload
-; CHECK-UNALIGNED-RV64-V-NEXT:    addi sp, sp, 96
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 127)


        


More information about the llvm-commits mailing list