[llvm] [RISCV] Allow tail memcmp expansion (PR #121460)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 2 01:07:49 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-risc-v

Author: Pengcheng Wang (wangpc-pp)

<details>
<summary>Changes</summary>

This optimization was introduced by #<!-- -->70469.

Like AArch64, we allow tail expansions for 3 on RV32 and 3/5/6
on RV64.

This can simplify the comparison and reduce the number of blocks.


---

Patch is 28.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121460.diff


3 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+5-2) 
- (modified) llvm/test/CodeGen/RISCV/memcmp-optsize.ll (+98-136) 
- (modified) llvm/test/CodeGen/RISCV/memcmp.ll (+98-136) 


``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 0abb270edcabc8..50e4b5f378932a 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2565,9 +2565,12 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
   Options.AllowOverlappingLoads = true;
   Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
   Options.NumLoadsPerBlock = Options.MaxNumLoads;
-  if (ST->is64Bit())
+  if (ST->is64Bit()) {
     Options.LoadSizes = {8, 4, 2, 1};
-  else
+    Options.AllowedTailExpansions = {3, 5, 6, 7};
+  } else {
     Options.LoadSizes = {4, 2, 1};
+    Options.AllowedTailExpansions = {3};
+  }
   return Options;
 }
diff --git a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
index d529ae6ecd0aba..b9a27b9d0c9e70 100644
--- a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
+++ b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
@@ -2449,82 +2449,72 @@ define i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind optsize {
 ;
 ; CHECK-UNALIGNED-RV32-ZBB-LABEL: memcmp_size_3:
 ; CHECK-UNALIGNED-RV32-ZBB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lh a2, 0(a0)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lh a3, 0(a1)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    srli a2, a2, 16
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    srli a3, a3, 16
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:  # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a0, 2(a0)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sub a0, a0, a1
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:  .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a2, 2(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lhu a0, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a3, 2(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lhu a1, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    slli a2, a2, 16
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    or a0, a0, a2
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    slli a3, a3, 16
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    or a1, a1, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_3:
 ; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lh a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lh a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a2, a2, 48
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a3, a3, 48
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a0, 2(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sub a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a2, 2(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a0, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a3, 2(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a1, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    slli a2, a2, 16
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a0, a2
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    slli a3, a3, 16
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a1, a1, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a0, a0, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a1, a1, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_3:
 ; CHECK-UNALIGNED-RV32-ZBKB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lh a2, 0(a0)
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lh a3, 0(a1)
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    srli a2, a2, 16
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    srli a3, a3, 16
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:  # %bb.1: # %loadbb1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lhu a2, 0(a0)
 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lbu a0, 2(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lhu a3, 0(a1)
 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sub a0, a0, a1
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:  .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    pack a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    pack a1, a3, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_3:
 ; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lh a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lh a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a2, a2, 48
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a3, a3, 48
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a0, 2(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sub a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a2, 2(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lhu a0, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a3, 2(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lhu a1, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    slli a2, a2, 16
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a0, a0, a2
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    slli a3, a3, 16
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a1, a1, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a0, a0, 32
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a1, a1, 32
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_3:
@@ -2845,22 +2835,19 @@ define i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind optsize {
 ;
 ; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_5:
 ; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a2, a2, 32
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a3, a3, 32
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    bne a2, a3, .LBB26_2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a0, 4(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a1, 4(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sub a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  .LBB26_2: # %res_block
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a2, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lwu a0, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a3, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lwu a1, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    slli a2, a2, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a0, a2
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    slli a3, a3, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a1, a1, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_5:
@@ -2883,22 +2870,17 @@ define i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind optsize {
 ;
 ; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_5:
 ; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a2, a2, 32
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a3, a3, 32
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    bne a2, a3, .LBB26_2
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  # %bb.1: # %loadbb1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lwu a2, 0(a0)
 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lwu a3, 0(a1)
 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a1, 4(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sub a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  .LBB26_2: # %res_block
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    pack a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    pack a1, a3, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_5:
@@ -3052,28 +3034,19 @@ define i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind optsize {
 ;
 ; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_6:
 ; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a2, a2, 32
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a3, a3, 32
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    bne a2, a3, .LBB27_3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lh a0, 4(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lh a1, 4(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a2, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a3, a1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a2, a2, 48
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a3, a3, 48
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    bne a2, a3, .LBB27_3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  # %bb.2:
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    li a0, 0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  .LBB27_3: # %res_block
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a2, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lwu a0, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a3, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lwu a1, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    slli a2, a2, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a0, a2
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    slli a3, a3, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a1, a1, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_6:
@@ -3102,28 +3075,17 @@ define i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind optsize {
 ;
 ; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_6:
 ; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lw a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a2, a2, 32
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a3, a3, 32
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    bne a2, a3, .LBB27_3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lh a0, 4(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lh a1, 4(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a2, a0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a3, a1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a2, a2, 48
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a3, a3, 48
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    bne a2, a3, .LBB27_3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  # %bb.2:
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    li a0, 0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  .LBB27_3: # %res_block
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lwu a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lwu a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    pack a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    pack a1, a3, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_6:
diff --git a/llvm/test/CodeGen/RISCV/memcmp.ll b/llvm/test/CodeGen/RISCV/memcmp.ll
index 860c3a94abc0a7..629a9298ee469d 100644
--- a/llvm/test/CodeGen/RISCV/memcmp.ll
+++ b/llvm/test/CodeGen/RISCV/memcmp.ll
@@ -3145,82 +3145,72 @@ define i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind {
 ;
 ; CHECK-UNALIGNED-RV32-ZBB-LABEL: memcmp_size_3:
 ; CHECK-UNALIGNED-RV32-ZBB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lh a2, 0(a0)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lh a3, 0(a1)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    srli a2, a2, 16
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    srli a3, a3, 16
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:  # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a0, 2(a0)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sub a0, a0, a1
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:  .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a2, 2(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lhu a0, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a3, 2(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lhu a1, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    slli a2, a2, 16
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    or a0, a0, a2
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    slli a3, a3, 16
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    or a1, a1, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_3:
 ; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lh a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lh a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a2, a2, 48
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a3, a3, 48
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a0, 2(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sub a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:  .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a2, 2(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a0, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a3, 2(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lhu a1, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    slli a2, a2, 16
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a0, a2
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    slli a3, a3, 16
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a1, a1, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a0, a0, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a1, a1, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_3:
 ; CHECK-UNALIGNED-RV32-ZBKB:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lh a2, 0(a0)
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lh a3, 0(a1)
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    rev8 a2, a2
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    rev8 a3, a3
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    srli a2, a2, 16
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    srli a3, a3, 16
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:  # %bb.1: # %loadbb1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lhu a2, 0(a0)
 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lbu a0, 2(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lhu a3, 0(a1)
 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sub a0, a0, a1
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:  .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sltu a0, a2, a3
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    neg a0, a0
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    pack a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    pack a1, a3, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sltu a2, a1, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sltu a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sub a0, a2, a0
 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_3:
 ; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
-;...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/121460


More information about the llvm-commits mailing list