[llvm-branch-commits] [llvm] [RISCV] Add initial support of memcmp expansion (PR #107548)
Pengcheng Wang via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Nov 1 00:44:10 PDT 2024
================
@@ -1144,42 +2872,116 @@ entry:
define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind {
; CHECK-ALIGNED-RV32-LABEL: memcmp_size_4:
; CHECK-ALIGNED-RV32: # %bb.0: # %entry
-; CHECK-ALIGNED-RV32-NEXT: addi sp, sp, -16
-; CHECK-ALIGNED-RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-ALIGNED-RV32-NEXT: li a2, 4
-; CHECK-ALIGNED-RV32-NEXT: call memcmp
-; CHECK-ALIGNED-RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK-ALIGNED-RV32-NEXT: addi sp, sp, 16
+; CHECK-ALIGNED-RV32-NEXT: lbu a2, 0(a0)
+; CHECK-ALIGNED-RV32-NEXT: lbu a3, 1(a0)
+; CHECK-ALIGNED-RV32-NEXT: lbu a4, 3(a0)
+; CHECK-ALIGNED-RV32-NEXT: lbu a0, 2(a0)
+; CHECK-ALIGNED-RV32-NEXT: lbu a5, 0(a1)
+; CHECK-ALIGNED-RV32-NEXT: lbu a6, 1(a1)
+; CHECK-ALIGNED-RV32-NEXT: lbu a7, 3(a1)
+; CHECK-ALIGNED-RV32-NEXT: lbu a1, 2(a1)
+; CHECK-ALIGNED-RV32-NEXT: slli a0, a0, 8
+; CHECK-ALIGNED-RV32-NEXT: or a0, a0, a4
+; CHECK-ALIGNED-RV32-NEXT: slli a3, a3, 16
+; CHECK-ALIGNED-RV32-NEXT: slli a2, a2, 24
+; CHECK-ALIGNED-RV32-NEXT: or a2, a2, a3
+; CHECK-ALIGNED-RV32-NEXT: or a0, a2, a0
+; CHECK-ALIGNED-RV32-NEXT: slli a1, a1, 8
+; CHECK-ALIGNED-RV32-NEXT: or a1, a1, a7
+; CHECK-ALIGNED-RV32-NEXT: slli a6, a6, 16
+; CHECK-ALIGNED-RV32-NEXT: slli a5, a5, 24
+; CHECK-ALIGNED-RV32-NEXT: or a2, a5, a6
+; CHECK-ALIGNED-RV32-NEXT: or a1, a2, a1
+; CHECK-ALIGNED-RV32-NEXT: sltu a2, a1, a0
+; CHECK-ALIGNED-RV32-NEXT: sltu a0, a0, a1
+; CHECK-ALIGNED-RV32-NEXT: sub a0, a2, a0
; CHECK-ALIGNED-RV32-NEXT: ret
;
; CHECK-ALIGNED-RV64-LABEL: memcmp_size_4:
; CHECK-ALIGNED-RV64: # %bb.0: # %entry
-; CHECK-ALIGNED-RV64-NEXT: addi sp, sp, -16
-; CHECK-ALIGNED-RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; CHECK-ALIGNED-RV64-NEXT: li a2, 4
-; CHECK-ALIGNED-RV64-NEXT: call memcmp
-; CHECK-ALIGNED-RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; CHECK-ALIGNED-RV64-NEXT: addi sp, sp, 16
+; CHECK-ALIGNED-RV64-NEXT: lbu a2, 0(a0)
+; CHECK-ALIGNED-RV64-NEXT: lbu a3, 1(a0)
+; CHECK-ALIGNED-RV64-NEXT: lbu a4, 2(a0)
+; CHECK-ALIGNED-RV64-NEXT: lb a0, 3(a0)
+; CHECK-ALIGNED-RV64-NEXT: lbu a5, 0(a1)
+; CHECK-ALIGNED-RV64-NEXT: lbu a6, 1(a1)
+; CHECK-ALIGNED-RV64-NEXT: lbu a7, 2(a1)
+; CHECK-ALIGNED-RV64-NEXT: lb a1, 3(a1)
+; CHECK-ALIGNED-RV64-NEXT: andi a0, a0, 255
+; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 8
+; CHECK-ALIGNED-RV64-NEXT: or a0, a4, a0
+; CHECK-ALIGNED-RV64-NEXT: slli a3, a3, 16
+; CHECK-ALIGNED-RV64-NEXT: slliw a2, a2, 24
+; CHECK-ALIGNED-RV64-NEXT: or a2, a2, a3
+; CHECK-ALIGNED-RV64-NEXT: or a0, a2, a0
+; CHECK-ALIGNED-RV64-NEXT: andi a1, a1, 255
+; CHECK-ALIGNED-RV64-NEXT: slli a7, a7, 8
+; CHECK-ALIGNED-RV64-NEXT: or a1, a7, a1
+; CHECK-ALIGNED-RV64-NEXT: slli a6, a6, 16
+; CHECK-ALIGNED-RV64-NEXT: slliw a2, a5, 24
+; CHECK-ALIGNED-RV64-NEXT: or a2, a2, a6
+; CHECK-ALIGNED-RV64-NEXT: or a1, a2, a1
+; CHECK-ALIGNED-RV64-NEXT: sltu a2, a1, a0
+; CHECK-ALIGNED-RV64-NEXT: sltu a0, a0, a1
+; CHECK-ALIGNED-RV64-NEXT: sub a0, a2, a0
; CHECK-ALIGNED-RV64-NEXT: ret
;
; CHECK-UNALIGNED-RV32-LABEL: memcmp_size_4:
; CHECK-UNALIGNED-RV32: # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-NEXT: addi sp, sp, -16
-; CHECK-UNALIGNED-RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-NEXT: li a2, 4
-; CHECK-UNALIGNED-RV32-NEXT: call memcmp
-; CHECK-UNALIGNED-RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-NEXT: addi sp, sp, 16
+; CHECK-UNALIGNED-RV32-NEXT: lw a0, 0(a0)
----------------
wangpc-pp wrote:
Here is the code of memcmp copied from glibc: https://godbolt.org/z/4KxPTE6q1
There are many cases (which means branches) in this general implementation, at least we can benefit from unrolling and removal of branches.
https://github.com/llvm/llvm-project/pull/107548
More information about the llvm-branch-commits
mailing list