[llvm-branch-commits] [llvm] [RISCV] Support non-power-of-2 types when expanding memcmp (PR #114971)

Pengcheng Wang via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Jun 18 01:11:11 PDT 2025


https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/114971

>From 3fd27bd1405a8b2c068786a200d610b9cacb65ef Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Tue, 5 Nov 2024 20:38:44 +0800
Subject: [PATCH 1/5] Set max bytes

Created using spr 1.3.6-beta.1
---
 llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index c65feb9755633..a1c5f76bae009 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2508,7 +2508,10 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
     Options.LoadSizes = {4, 2, 1};
   if (IsZeroCmp && ST->hasVInstructions()) {
     unsigned VLenB = ST->getRealMinVLen() / 8;
-    for (unsigned Size = ST->getXLen() / 8 + 1;
+    // The minimum size should be the maximum bytes between `VLen * LMUL_MF8`
+    // and `XLen + 8`.
+    unsigned MinSize = std::max(VLenB / 8, ST->getXLen() / 8 + 1);
+    for (unsigned Size = MinSize;
          Size <= VLenB * ST->getMaxLMULForFixedLengthVectors(); Size++)
       Options.LoadSizes.insert(Options.LoadSizes.begin(), Size);
   }

>From 17115212f1d7af68f5374896d1ddadf464b2bc11 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Fri, 13 Jun 2025 18:24:15 +0800
Subject: [PATCH 2/5] Change to XLen + 1

Created using spr 1.3.6-beta.1
---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp |   4 +-
 llvm/test/CodeGen/RISCV/memcmp-optsize.ll     | 324 +++++++++++++++---
 llvm/test/CodeGen/RISCV/memcmp.ll             | 324 +++++++++++++++---
 3 files changed, 570 insertions(+), 82 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 4b9ea30a92c99..3aa0fcbb723a1 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2956,8 +2956,8 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
   if (IsZeroCmp && ST->hasVInstructions()) {
     unsigned VLenB = ST->getRealMinVLen() / 8;
     // The minimum size should be the maximum bytes between `VLen * LMUL_MF8`
-    // and `XLen * 2`.
-    unsigned MinSize = std::max(VLenB / 8, ST->getXLen() * 2 / 8);
+    // and `XLen + 1`.
+    unsigned MinSize = std::max(VLenB / 8, ST->getXLen() / 8 + 1);
     for (unsigned Size = MinSize;
          Size <= VLenB * ST->getMaxLMULForFixedLengthVectors(); Size++)
       Options.LoadSizes.insert(Options.LoadSizes.begin(), Size);
diff --git a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
index d4d12a932d0ec..0d57e4201512e 100644
--- a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
+++ b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
@@ -517,17 +517,99 @@ define i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind optsize {
 ; CHECK-ALIGNED-RV64-V-NEXT:    addi sp, sp, 16
 ; CHECK-ALIGNED-RV64-V-NEXT:    ret
 ;
-; CHECK-UNALIGNED-LABEL: bcmp_size_5:
-; CHECK-UNALIGNED:       # %bb.0: # %entry
-; CHECK-UNALIGNED-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-NEXT:    lbu a0, 4(a0)
-; CHECK-UNALIGNED-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-NEXT:    lbu a1, 4(a1)
-; CHECK-UNALIGNED-NEXT:    xor a2, a2, a3
-; CHECK-UNALIGNED-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-NEXT:    or a0, a2, a0
-; CHECK-UNALIGNED-NEXT:    snez a0, a0
-; CHECK-UNALIGNED-NEXT:    ret
+; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-V-NEXT:    vsetivli zero, 5, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
+; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-V-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 5)
   ret i32 %bcmp
@@ -614,17 +696,99 @@ define i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind optsize {
 ; CHECK-ALIGNED-RV64-V-NEXT:    addi sp, sp, 16
 ; CHECK-ALIGNED-RV64-V-NEXT:    ret
 ;
-; CHECK-UNALIGNED-LABEL: bcmp_size_6:
-; CHECK-UNALIGNED:       # %bb.0: # %entry
-; CHECK-UNALIGNED-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-NEXT:    lhu a0, 4(a0)
-; CHECK-UNALIGNED-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-NEXT:    lhu a1, 4(a1)
-; CHECK-UNALIGNED-NEXT:    xor a2, a2, a3
-; CHECK-UNALIGNED-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-NEXT:    or a0, a2, a0
-; CHECK-UNALIGNED-NEXT:    snez a0, a0
-; CHECK-UNALIGNED-NEXT:    ret
+; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-V-NEXT:    vsetivli zero, 6, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
+; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-V-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 6)
   ret i32 %bcmp
@@ -711,17 +875,99 @@ define i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind optsize {
 ; CHECK-ALIGNED-RV64-V-NEXT:    addi sp, sp, 16
 ; CHECK-ALIGNED-RV64-V-NEXT:    ret
 ;
-; CHECK-UNALIGNED-LABEL: bcmp_size_7:
-; CHECK-UNALIGNED:       # %bb.0: # %entry
-; CHECK-UNALIGNED-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-NEXT:    lw a0, 3(a0)
-; CHECK-UNALIGNED-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-NEXT:    lw a1, 3(a1)
-; CHECK-UNALIGNED-NEXT:    xor a2, a2, a3
-; CHECK-UNALIGNED-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-NEXT:    or a0, a2, a0
-; CHECK-UNALIGNED-NEXT:    snez a0, a0
-; CHECK-UNALIGNED-NEXT:    ret
+; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV32-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV32-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-V-NEXT:    vsetivli zero, 7, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
+; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-V-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 7)
   ret i32 %bcmp
@@ -1079,13 +1325,11 @@ define i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind optsize {
 ;
 ; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_15:
 ; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a2, 0(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a0, 7(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a3, 0(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a1, 7(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a3
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    vsetivli zero, 15, e8, m1, ta, ma
+; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/RISCV/memcmp.ll b/llvm/test/CodeGen/RISCV/memcmp.ll
index f57dc207c625e..0caab1f5ce2f0 100644
--- a/llvm/test/CodeGen/RISCV/memcmp.ll
+++ b/llvm/test/CodeGen/RISCV/memcmp.ll
@@ -517,17 +517,99 @@ define i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind {
 ; CHECK-ALIGNED-RV64-V-NEXT:    addi sp, sp, 16
 ; CHECK-ALIGNED-RV64-V-NEXT:    ret
 ;
-; CHECK-UNALIGNED-LABEL: bcmp_size_5:
-; CHECK-UNALIGNED:       # %bb.0: # %entry
-; CHECK-UNALIGNED-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-NEXT:    lbu a0, 4(a0)
-; CHECK-UNALIGNED-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-NEXT:    lbu a1, 4(a1)
-; CHECK-UNALIGNED-NEXT:    xor a2, a2, a3
-; CHECK-UNALIGNED-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-NEXT:    or a0, a2, a0
-; CHECK-UNALIGNED-NEXT:    snez a0, a0
-; CHECK-UNALIGNED-NEXT:    ret
+; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-V-NEXT:    vsetivli zero, 5, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
+; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-V-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_5:
+; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lbu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 5)
   ret i32 %bcmp
@@ -614,17 +696,99 @@ define i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind {
 ; CHECK-ALIGNED-RV64-V-NEXT:    addi sp, sp, 16
 ; CHECK-ALIGNED-RV64-V-NEXT:    ret
 ;
-; CHECK-UNALIGNED-LABEL: bcmp_size_6:
-; CHECK-UNALIGNED:       # %bb.0: # %entry
-; CHECK-UNALIGNED-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-NEXT:    lhu a0, 4(a0)
-; CHECK-UNALIGNED-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-NEXT:    lhu a1, 4(a1)
-; CHECK-UNALIGNED-NEXT:    xor a2, a2, a3
-; CHECK-UNALIGNED-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-NEXT:    or a0, a2, a0
-; CHECK-UNALIGNED-NEXT:    snez a0, a0
-; CHECK-UNALIGNED-NEXT:    ret
+; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-V-NEXT:    vsetivli zero, 6, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
+; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-V-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_6:
+; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 6)
   ret i32 %bcmp
@@ -711,17 +875,99 @@ define i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind {
 ; CHECK-ALIGNED-RV64-V-NEXT:    addi sp, sp, 16
 ; CHECK-ALIGNED-RV64-V-NEXT:    ret
 ;
-; CHECK-UNALIGNED-LABEL: bcmp_size_7:
-; CHECK-UNALIGNED:       # %bb.0: # %entry
-; CHECK-UNALIGNED-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-NEXT:    lw a0, 3(a0)
-; CHECK-UNALIGNED-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-NEXT:    lw a1, 3(a1)
-; CHECK-UNALIGNED-NEXT:    xor a2, a2, a3
-; CHECK-UNALIGNED-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-NEXT:    or a0, a2, a0
-; CHECK-UNALIGNED-NEXT:    snez a0, a0
-; CHECK-UNALIGNED-NEXT:    ret
+; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV32-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV32-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-V-NEXT:    vsetivli zero, 7, e8, mf2, ta, ma
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV32-V-NEXT:    vcpop.m a0, v8
+; CHECK-UNALIGNED-RV32-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV32-V-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_7:
+; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a2, 0(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a0, 3(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a3, 0(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lw a1, 3(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a3
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 7)
   ret i32 %bcmp
@@ -1079,13 +1325,11 @@ define i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind {
 ;
 ; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_15:
 ; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a2, 0(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a0, 7(a0)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a3, 0(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    ld a1, 7(a1)
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a3
-; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
-; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    vsetivli zero, 15, e8, m1, ta, ma
+; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v8, (a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    vle8.v v9, (a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    vmsne.vv v8, v8, v9
+; CHECK-UNALIGNED-RV64-V-NEXT:    vcpop.m a0, v8
 ; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
 ; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:

>From 3fb6d8a32231e8b77f926eda89886f47d77c3e1d Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Mon, 16 Jun 2025 12:48:40 +0800
Subject: [PATCH 3/5] MinSize = XLen + 1

Created using spr 1.3.6-beta.1
---
 llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 3aa0fcbb723a1..1b0e95df6a79e 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2955,9 +2955,8 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
 
   if (IsZeroCmp && ST->hasVInstructions()) {
     unsigned VLenB = ST->getRealMinVLen() / 8;
-    // The minimum size should be the maximum bytes between `VLen * LMUL_MF8`
-    // and `XLen + 1`.
-    unsigned MinSize = std::max(VLenB / 8, ST->getXLen() / 8 + 1);
+    // The minimum size should be `XLen + 1`.
+    unsigned MinSize = ST->getXLen() / 8 + 1;
     for (unsigned Size = MinSize;
          Size <= VLenB * ST->getMaxLMULForFixedLengthVectors(); Size++)
       Options.LoadSizes.insert(Options.LoadSizes.begin(), Size);

>From bd11a42f765876d5861da83a096def58731a678e Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Tue, 17 Jun 2025 12:04:26 +0800
Subject: [PATCH 4/5] Update comment and simplify code

Created using spr 1.3.6-beta.1
---
 llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 4db1bb600a96a..c19a91c0a0ace 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2995,10 +2995,11 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
 
   if (IsZeroCmp && ST->hasVInstructions()) {
     unsigned VLenB = ST->getRealMinVLen() / 8;
-    // The minimum size should be `XLen / 8 + 1`.
+    // The minimum size should be `XLen / 8 + 1`, and the maxinum size should be
+    // `VLenB * MaxLMUL` so that it fits in a single register group.
     unsigned MinSize = ST->getXLen() / 8 + 1;
-    for (unsigned Size = MinSize;
-         Size <= VLenB * ST->getMaxLMULForFixedLengthVectors(); Size++)
+    unsigned MaxSize = VLenB * ST->getMaxLMULForFixedLengthVectors();
+    for (unsigned Size = MinSize; Size <= MaxSize; Size++)
       Options.LoadSizes.insert(Options.LoadSizes.begin(), Size);
   }
   return Options;

>From 49d897bd3a72a3326c16632b6fc82bae7e691c5f Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Tue, 17 Jun 2025 18:17:26 +0800
Subject: [PATCH 5/5] Bail out for non-byte-size types

Created using spr 1.3.6-beta.1
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  5 ++-
 .../test/CodeGen/RISCV/icmp-non-byte-sized.ll | 41 +++++++++++++++++++
 2 files changed, 45 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/RISCV/icmp-non-byte-sized.ll

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ca43433c6e816..997fa7e04f9d6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16211,7 +16211,10 @@ combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
           Attribute::NoImplicitFloat))
     return SDValue();
 
-  assert(OpSize % 8 == 0 && "The size should be a multiple of 8");
+  // Bail out for non-byte-sized types.
+  if (!OpVT.isByteSized())
+    return SDValue();
+
   unsigned VecSize = OpSize / 8;
   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, VecSize);
   EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VecSize);
diff --git a/llvm/test/CodeGen/RISCV/icmp-non-byte-sized.ll b/llvm/test/CodeGen/RISCV/icmp-non-byte-sized.ll
new file mode 100644
index 0000000000000..fca6238548aab
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/icmp-non-byte-sized.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+v -O2 < %s | FileCheck %s --check-prefix=CHECK-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -O2 < %s | FileCheck %s --check-prefix=CHECK-RV64
+
+define i1 @icmp_non_byte_type(ptr %p1, ptr %p2) nounwind {
+; CHECK-RV32-LABEL: icmp_non_byte_type:
+; CHECK-RV32:       # %bb.0:
+; CHECK-RV32-NEXT:    lw a2, 0(a0)
+; CHECK-RV32-NEXT:    lw a3, 4(a0)
+; CHECK-RV32-NEXT:    lw a4, 8(a0)
+; CHECK-RV32-NEXT:    lw a0, 12(a0)
+; CHECK-RV32-NEXT:    lw a5, 12(a1)
+; CHECK-RV32-NEXT:    lw a6, 4(a1)
+; CHECK-RV32-NEXT:    lw a7, 8(a1)
+; CHECK-RV32-NEXT:    lw a1, 0(a1)
+; CHECK-RV32-NEXT:    xor a0, a0, a5
+; CHECK-RV32-NEXT:    xor a3, a3, a6
+; CHECK-RV32-NEXT:    xor a4, a4, a7
+; CHECK-RV32-NEXT:    xor a1, a2, a1
+; CHECK-RV32-NEXT:    or a0, a3, a0
+; CHECK-RV32-NEXT:    or a1, a1, a4
+; CHECK-RV32-NEXT:    or a0, a1, a0
+; CHECK-RV32-NEXT:    seqz a0, a0
+; CHECK-RV32-NEXT:    ret
+;
+; CHECK-RV64-LABEL: icmp_non_byte_type:
+; CHECK-RV64:       # %bb.0:
+; CHECK-RV64-NEXT:    ld a2, 0(a0)
+; CHECK-RV64-NEXT:    ld a0, 8(a0)
+; CHECK-RV64-NEXT:    ld a3, 8(a1)
+; CHECK-RV64-NEXT:    ld a1, 0(a1)
+; CHECK-RV64-NEXT:    xor a0, a0, a3
+; CHECK-RV64-NEXT:    xor a1, a2, a1
+; CHECK-RV64-NEXT:    or a0, a1, a0
+; CHECK-RV64-NEXT:    seqz a0, a0
+; CHECK-RV64-NEXT:    ret
+  %v1 = load i127, ptr %p1
+  %v2 = load i127, ptr %p2
+  %ret = icmp eq i127 %v1, %v2
+  ret i1 %ret
+}



More information about the llvm-branch-commits mailing list