[llvm] [RISCV] Allow tail memcmp expansion (PR #121460)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 2 01:07:49 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Pengcheng Wang (wangpc-pp)
<details>
<summary>Changes</summary>
This optimization was introduced by #<!-- -->70469.
Like AArch64, we allow tail expansions for 3 on RV32 and 3/5/6
on RV64.
This can simplify the comparison and reduce the number of blocks.
---
Patch is 28.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121460.diff
3 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+5-2)
- (modified) llvm/test/CodeGen/RISCV/memcmp-optsize.ll (+98-136)
- (modified) llvm/test/CodeGen/RISCV/memcmp.ll (+98-136)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 0abb270edcabc8..50e4b5f378932a 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2565,9 +2565,12 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
Options.AllowOverlappingLoads = true;
Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
Options.NumLoadsPerBlock = Options.MaxNumLoads;
- if (ST->is64Bit())
+ if (ST->is64Bit()) {
Options.LoadSizes = {8, 4, 2, 1};
- else
+ Options.AllowedTailExpansions = {3, 5, 6, 7};
+ } else {
Options.LoadSizes = {4, 2, 1};
+ Options.AllowedTailExpansions = {3};
+ }
return Options;
}
diff --git a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
index d529ae6ecd0aba..b9a27b9d0c9e70 100644
--- a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
+++ b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
@@ -2449,82 +2449,72 @@ define i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind optsize {
;
; CHECK-UNALIGNED-RV32-ZBB-LABEL: memcmp_size_3:
; CHECK-UNALIGNED-RV32-ZBB: # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: lh a2, 0(a0)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: lh a3, 0(a1)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a2, a2
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a3, a3
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: srli a2, a2, 16
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: srli a3, a3, 16
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a0, 2(a0)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: sub a0, a0, a1
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a0, a2, a3
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: neg a0, a0
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: ori a0, a0, 1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a2, 2(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a0, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a3, 2(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a1, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: slli a2, a2, 16
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a0, a2
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: slli a3, a3, 16
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a1, a1, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a1, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a2, a1, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: sub a0, a2, a0
; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret
;
; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_3:
; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: lh a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: lh a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a2, a2, 48
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a3, a3, 48
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a0, 2(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a2, 2(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a0, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a3, 2(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a1, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a2, a2, 16
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a0, a2
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a3, a3, 16
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a1, a1, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a0, a0, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a1, a1, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a2, a0
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
;
; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_3:
; CHECK-UNALIGNED-RV32-ZBKB: # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lh a2, 0(a0)
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lh a3, 0(a1)
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a2, a2
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a3, a3
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: srli a2, a2, 16
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: srli a3, a3, 16
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: # %bb.1: # %loadbb1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a2, 0(a0)
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a0, 2(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a3, 0(a1)
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sub a0, a0, a1
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a0, a2, a3
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: neg a0, a0
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ori a0, a0, 1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: pack a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: pack a1, a3, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a1, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a2, a1, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sub a0, a2, a0
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret
;
; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_3:
; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lh a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lh a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a2, a2, 48
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a3, a3, 48
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a0, 2(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a2, 2(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a0, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a3, 2(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a1, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: slli a2, a2, 16
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a0, a0, a2
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: slli a3, a3, 16
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a1, a1, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a0, a0, 32
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a1, a1, 32
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a2, a0
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
;
; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_3:
@@ -2845,22 +2835,19 @@ define i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind optsize {
;
; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_5:
; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a2, a2, 32
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a3, a3, 32
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: bne a2, a3, .LBB26_2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a0, 4(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a1, 4(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: .LBB26_2: # %res_block
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a2, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lwu a0, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a3, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lwu a1, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a2, a2, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a0, a2
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a3, a3, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a1, a1, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a2, a0
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
;
; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_5:
@@ -2883,22 +2870,17 @@ define i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind optsize {
;
; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_5:
; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a2, a2, 32
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a3, a3, 32
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: bne a2, a3, .LBB26_2
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: # %bb.1: # %loadbb1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a2, 0(a0)
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a3, 0(a1)
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a1, 4(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: .LBB26_2: # %res_block
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a1, a3, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a2, a0
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
;
; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_5:
@@ -3052,28 +3034,19 @@ define i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind optsize {
;
; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_6:
; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a2, a2, 32
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a3, a3, 32
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: bne a2, a3, .LBB27_3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: lh a0, 4(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: lh a1, 4(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a2, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a3, a1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a2, a2, 48
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a3, a3, 48
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: bne a2, a3, .LBB27_3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: # %bb.2:
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: li a0, 0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: .LBB27_3: # %res_block
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a2, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lwu a0, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a3, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lwu a1, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a2, a2, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a0, a2
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a3, a3, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a1, a1, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a2, a0
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
;
; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_6:
@@ -3102,28 +3075,17 @@ define i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind optsize {
;
; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_6:
; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a2, a2, 32
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a3, a3, 32
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: bne a2, a3, .LBB27_3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lh a0, 4(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lh a1, 4(a1)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a2, a0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a3, a1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a2, a2, 48
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a3, a3, 48
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: bne a2, a3, .LBB27_3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: # %bb.2:
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: li a0, 0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: .LBB27_3: # %res_block
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a0, 4(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a1, 4(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a1, a3, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a2, a0
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
;
; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_6:
diff --git a/llvm/test/CodeGen/RISCV/memcmp.ll b/llvm/test/CodeGen/RISCV/memcmp.ll
index 860c3a94abc0a7..629a9298ee469d 100644
--- a/llvm/test/CodeGen/RISCV/memcmp.ll
+++ b/llvm/test/CodeGen/RISCV/memcmp.ll
@@ -3145,82 +3145,72 @@ define i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind {
;
; CHECK-UNALIGNED-RV32-ZBB-LABEL: memcmp_size_3:
; CHECK-UNALIGNED-RV32-ZBB: # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: lh a2, 0(a0)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: lh a3, 0(a1)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a2, a2
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a3, a3
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: srli a2, a2, 16
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: srli a3, a3, 16
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a0, 2(a0)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: sub a0, a0, a1
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a0, a2, a3
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: neg a0, a0
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: ori a0, a0, 1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a2, 2(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a0, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a3, 2(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a1, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: slli a2, a2, 16
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a0, a2
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: slli a3, a3, 16
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a1, a1, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a1, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a2, a1, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: sub a0, a2, a0
; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret
;
; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_3:
; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: lh a2, 0(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: lh a3, 0(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a2, a2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a3, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a2, a2, 48
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a3, a3, 48
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: # %bb.1: # %loadbb1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a0, 2(a0)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a2, a3
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: neg a0, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a2, 2(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a0, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a3, 2(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a1, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a2, a2, 16
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a0, a2
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a3, a3, 16
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a1, a1, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a0, a0, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a1, a1, 32
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a1, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a2, a0
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
;
; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_3:
; CHECK-UNALIGNED-RV32-ZBKB: # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lh a2, 0(a0)
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lh a3, 0(a1)
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a2, a2
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a3, a3
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: srli a2, a2, 16
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: srli a3, a3, 16
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: bne a2, a3, .LBB24_2
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: # %bb.1: # %loadbb1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a2, 0(a0)
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a0, 2(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a3, 0(a1)
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a1, 2(a1)
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sub a0, a0, a1
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: .LBB24_2: # %res_block
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a0, a2, a3
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: neg a0, a0
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ori a0, a0, 1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: pack a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: pack a1, a3, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a1, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a2, a1, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sub a0, a2, a0
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret
;
; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_3:
; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry
-;...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/121460
More information about the llvm-commits
mailing list