[llvm-branch-commits] [llvm] [RISCV] Add initial support of memcmp expansion (PR #107548)

Pengcheng Wang via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Oct 10 02:02:59 PDT 2024


================
@@ -1144,42 +2872,116 @@ entry:
 define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind {
 ; CHECK-ALIGNED-RV32-LABEL: memcmp_size_4:
 ; CHECK-ALIGNED-RV32:       # %bb.0: # %entry
-; CHECK-ALIGNED-RV32-NEXT:    addi sp, sp, -16
-; CHECK-ALIGNED-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-ALIGNED-RV32-NEXT:    li a2, 4
-; CHECK-ALIGNED-RV32-NEXT:    call memcmp
-; CHECK-ALIGNED-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK-ALIGNED-RV32-NEXT:    addi sp, sp, 16
+; CHECK-ALIGNED-RV32-NEXT:    lbu a2, 0(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a3, 1(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a4, 3(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a0, 2(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a5, 0(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a6, 1(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a7, 3(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a1, 2(a1)
+; CHECK-ALIGNED-RV32-NEXT:    slli a0, a0, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a0, a0, a4
+; CHECK-ALIGNED-RV32-NEXT:    slli a3, a3, 16
+; CHECK-ALIGNED-RV32-NEXT:    slli a2, a2, 24
+; CHECK-ALIGNED-RV32-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV32-NEXT:    or a0, a2, a0
+; CHECK-ALIGNED-RV32-NEXT:    slli a1, a1, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a1, a1, a7
+; CHECK-ALIGNED-RV32-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV32-NEXT:    slli a5, a5, 24
+; CHECK-ALIGNED-RV32-NEXT:    or a2, a5, a6
+; CHECK-ALIGNED-RV32-NEXT:    or a1, a2, a1
+; CHECK-ALIGNED-RV32-NEXT:    sltu a2, a1, a0
+; CHECK-ALIGNED-RV32-NEXT:    sltu a0, a0, a1
+; CHECK-ALIGNED-RV32-NEXT:    sub a0, a2, a0
 ; CHECK-ALIGNED-RV32-NEXT:    ret
 ;
 ; CHECK-ALIGNED-RV64-LABEL: memcmp_size_4:
 ; CHECK-ALIGNED-RV64:       # %bb.0: # %entry
-; CHECK-ALIGNED-RV64-NEXT:    addi sp, sp, -16
-; CHECK-ALIGNED-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; CHECK-ALIGNED-RV64-NEXT:    li a2, 4
-; CHECK-ALIGNED-RV64-NEXT:    call memcmp
-; CHECK-ALIGNED-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; CHECK-ALIGNED-RV64-NEXT:    addi sp, sp, 16
+; CHECK-ALIGNED-RV64-NEXT:    lbu a2, 0(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 1(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lb a0, 3(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 0(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 1(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 2(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lb a1, 3(a1)
+; CHECK-ALIGNED-RV64-NEXT:    andi a0, a0, 255
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a4, a0
+; CHECK-ALIGNED-RV64-NEXT:    slli a3, a3, 16
+; CHECK-ALIGNED-RV64-NEXT:    slliw a2, a2, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-ALIGNED-RV64-NEXT:    andi a1, a1, 255
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a1, a7, a1
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-NEXT:    slliw a2, a5, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a2, a6
+; CHECK-ALIGNED-RV64-NEXT:    or a1, a2, a1
+; CHECK-ALIGNED-RV64-NEXT:    sltu a2, a1, a0
+; CHECK-ALIGNED-RV64-NEXT:    sltu a0, a0, a1
+; CHECK-ALIGNED-RV64-NEXT:    sub a0, a2, a0
 ; CHECK-ALIGNED-RV64-NEXT:    ret
 ;
 ; CHECK-UNALIGNED-RV32-LABEL: memcmp_size_4:
 ; CHECK-UNALIGNED-RV32:       # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-NEXT:    addi sp, sp, -16
-; CHECK-UNALIGNED-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-NEXT:    li a2, 4
-; CHECK-UNALIGNED-RV32-NEXT:    call memcmp
-; CHECK-UNALIGNED-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-NEXT:    addi sp, sp, 16
+; CHECK-UNALIGNED-RV32-NEXT:    lw a0, 0(a0)
----------------
wangpc-pp wrote:

Done.
It seems we can benefit from not only `rev8` instructions but also `pack` instructions (for forming large integers).
But I don't think we should disable the expansion when Zbb/Zbkb don't exist. If these extensions are not supported and we don't expand memcmp, we would still execute these instructions in glibc, right?

https://github.com/llvm/llvm-project/pull/107548


More information about the llvm-branch-commits mailing list