[llvm] [RISCV] Allow tail memcmp expansion (PR #121460)

Pengcheng Wang via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 2 01:07:47 PST 2025


https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/121460

>From ffa03c3610ae384a765fff4e495dae2c6b33b4b9 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Thu, 2 Jan 2025 17:02:30 +0800
Subject: [PATCH] [RISCV] Allow tail memcmp expansion

This optimization was introduced by #70469.

Like AArch64, we allow tail expansions for 3 on RV32 and 3/5/6
on RV64.

This can simplify the comparison and reduce the number of blocks.
---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp |   7 +-
 llvm/test/CodeGen/RISCV/memcmp-optsize.ll     | 234 ++++++++----------
 llvm/test/CodeGen/RISCV/memcmp.ll             | 234 ++++++++----------
 3 files changed, 201 insertions(+), 274 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 0abb270edcabc8..4256605dca894f 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2565,9 +2565,12 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
   Options.AllowOverlappingLoads = true;
   Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
   Options.NumLoadsPerBlock = Options.MaxNumLoads;
-  if (ST->is64Bit())
+  if (ST->is64Bit()) {
     Options.LoadSizes = {8, 4, 2, 1};
-  else
+    Options.AllowedTailExpansions = {3, 5, 6};
+  } else {
     Options.LoadSizes = {4, 2, 1};
+    Options.AllowedTailExpansions = {3};
+  }
   return Options;
 }
diff --git a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
index d529ae6ecd0aba..b9a27b9d0c9e70 100644
--- a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
+++ b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
@@ -2449,82 +2449,72 @@ define i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind optsize {
 ;
 ; CHECK-UNALIGNED-RV32-ZBB-LABEL: memcmp_size_3:
 ; CHECK-UNALIGNED-RV32-ZBB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lh a2, 0(a0)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lh a3, 0(a1)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    srli a2, a2, 16
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    srli a3, a3, 16
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:  # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a0, 2(a0)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sub a0, a0, a1
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:  .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a2, 2(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lhu a0, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a3, 2(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lhu a1, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    slli a2, a2, 16
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    or a0, a0, a2
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    slli a3, a3, 16
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    or a1, a1, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_3:
 ; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lh a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lh a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a2, a2, 48
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a3, a3, 48
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a0, 2(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sub a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a2, 2(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a0, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a3, 2(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a1, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    slli a2, a2, 16
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a0, a2
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    slli a3, a3, 16
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a1, a1, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a0, a0, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a1, a1, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_3:
 ; CHECK-UNALIGNED-RV32-ZBKB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lh a2, 0(a0)
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lh a3, 0(a1)
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    srli a2, a2, 16
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    srli a3, a3, 16
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:  # %bb.1: # %loadbb1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lhu a2, 0(a0)
 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lbu a0, 2(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lhu a3, 0(a1)
 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sub a0, a0, a1
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:  .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    pack a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    pack a1, a3, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_3:
 ; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lh a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lh a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a2, a2, 48
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a3, a3, 48
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a0, 2(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sub a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a2, 2(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lhu a0, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a3, 2(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lhu a1, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    slli a2, a2, 16
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a0, a0, a2
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    slli a3, a3, 16
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a1, a1, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a0, a0, 32
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a1, a1, 32
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_3:
@@ -2845,22 +2835,19 @@ define i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind optsize {
 ;
 ; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_5:
 ; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a2, a2, 32
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a3, a3, 32
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    bne a2, a3, .LBB26_2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a0, 4(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a1, 4(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sub a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  .LBB26_2: # %res_block
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a2, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lwu a0, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a3, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lwu a1, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    slli a2, a2, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a0, a2
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    slli a3, a3, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a1, a1, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_5:
@@ -2883,22 +2870,17 @@ define i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind optsize {
 ;
 ; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_5:
 ; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a2, a2, 32
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a3, a3, 32
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    bne a2, a3, .LBB26_2
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  # %bb.1: # %loadbb1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lwu a2, 0(a0)
 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lwu a3, 0(a1)
 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a1, 4(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sub a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  .LBB26_2: # %res_block
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    pack a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    pack a1, a3, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_5:
@@ -3052,28 +3034,19 @@ define i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind optsize {
 ;
 ; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_6:
 ; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a2, a2, 32
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a3, a3, 32
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    bne a2, a3, .LBB27_3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lh a0, 4(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lh a1, 4(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a2, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a3, a1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a2, a2, 48
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a3, a3, 48
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    bne a2, a3, .LBB27_3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  # %bb.2:
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    li a0, 0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  .LBB27_3: # %res_block
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a2, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lwu a0, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a3, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lwu a1, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    slli a2, a2, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a0, a2
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    slli a3, a3, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a1, a1, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_6:
@@ -3102,28 +3075,17 @@ define i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind optsize {
 ;
 ; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_6:
 ; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a2, a2, 32
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a3, a3, 32
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    bne a2, a3, .LBB27_3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lh a0, 4(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lh a1, 4(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a2, a0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a3, a1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a2, a2, 48
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a3, a3, 48
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    bne a2, a3, .LBB27_3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  # %bb.2:
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    li a0, 0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  .LBB27_3: # %res_block
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lwu a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lwu a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    pack a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    pack a1, a3, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_6:
diff --git a/llvm/test/CodeGen/RISCV/memcmp.ll b/llvm/test/CodeGen/RISCV/memcmp.ll
index 860c3a94abc0a7..629a9298ee469d 100644
--- a/llvm/test/CodeGen/RISCV/memcmp.ll
+++ b/llvm/test/CodeGen/RISCV/memcmp.ll
@@ -3145,82 +3145,72 @@ define i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind {
 ;
 ; CHECK-UNALIGNED-RV32-ZBB-LABEL: memcmp_size_3:
 ; CHECK-UNALIGNED-RV32-ZBB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lh a2, 0(a0)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lh a3, 0(a1)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    srli a2, a2, 16
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    srli a3, a3, 16
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:  # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a0, 2(a0)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sub a0, a0, a1
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:  .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a2, 2(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lhu a0, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a3, 2(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lhu a1, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    slli a2, a2, 16
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    or a0, a0, a2
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    slli a3, a3, 16
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    or a1, a1, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_3:
 ; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lh a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lh a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a2, a2, 48
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a3, a3, 48
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a0, 2(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sub a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a2, 2(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a0, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a3, 2(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a1, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    slli a2, a2, 16
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a0, a2
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    slli a3, a3, 16
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a1, a1, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a0, a0, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a1, a1, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_3:
 ; CHECK-UNALIGNED-RV32-ZBKB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lh a2, 0(a0)
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lh a3, 0(a1)
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    srli a2, a2, 16
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    srli a3, a3, 16
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:  # %bb.1: # %loadbb1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lhu a2, 0(a0)
 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lbu a0, 2(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lhu a3, 0(a1)
 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sub a0, a0, a1
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:  .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    pack a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    pack a1, a3, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_3:
 ; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lh a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lh a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a2, a2, 48
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a3, a3, 48
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a0, 2(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sub a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a2, 2(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lhu a0, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a3, 2(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lhu a1, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    slli a2, a2, 16
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a0, a0, a2
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    slli a3, a3, 16
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a1, a1, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a0, a0, 32
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a1, a1, 32
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_3:
@@ -3541,22 +3531,19 @@ define i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind {
 ;
 ; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_5:
 ; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a2, a2, 32
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a3, a3, 32
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    bne a2, a3, .LBB26_2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a0, 4(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a1, 4(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sub a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  .LBB26_2: # %res_block
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a2, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lwu a0, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a3, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lwu a1, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    slli a2, a2, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a0, a2
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    slli a3, a3, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a1, a1, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_5:
@@ -3579,22 +3566,17 @@ define i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind {
 ;
 ; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_5:
 ; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a2, a2, 32
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a3, a3, 32
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    bne a2, a3, .LBB26_2
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  # %bb.1: # %loadbb1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lwu a2, 0(a0)
 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lwu a3, 0(a1)
 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a1, 4(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sub a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  .LBB26_2: # %res_block
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    pack a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    pack a1, a3, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_5:
@@ -3748,28 +3730,19 @@ define i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind {
 ;
 ; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_6:
 ; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a2, a2, 32
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a3, a3, 32
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    bne a2, a3, .LBB27_3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lh a0, 4(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lh a1, 4(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a2, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a3, a1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a2, a2, 48
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a3, a3, 48
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    bne a2, a3, .LBB27_3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  # %bb.2:
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    li a0, 0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  .LBB27_3: # %res_block
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a2, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lwu a0, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a3, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lwu a1, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    slli a2, a2, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a0, a2
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    slli a3, a3, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a1, a1, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_6:
@@ -3798,28 +3771,17 @@ define i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind {
 ;
 ; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_6:
 ; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a2, a2, 32
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a3, a3, 32
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    bne a2, a3, .LBB27_3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lh a0, 4(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lh a1, 4(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a2, a0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a3, a1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a2, a2, 48
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a3, a3, 48
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    bne a2, a3, .LBB27_3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  # %bb.2:
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    li a0, 0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  .LBB27_3: # %res_block
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lwu a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lwu a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    pack a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    pack a1, a3, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_6:



More information about the llvm-commits mailing list