[llvm-branch-commits] [llvm] [RISCV] Add initial support of memcmp expansion (PR #107548)

Pengcheng Wang via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Nov 4 01:16:14 PST 2024


================
@@ -315,967 +3233,10985 @@ define i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind optsize {
 ; CHECK-RV32:       # %bb.0: # %entry
 ; CHECK-RV32-NEXT:    addi sp, sp, -16
 ; CHECK-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT:    li a2, 31
+; CHECK-RV32-NEXT:    li a2, 31
+; CHECK-RV32-NEXT:    call bcmp
+; CHECK-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    addi sp, sp, 16
+; CHECK-RV32-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-LABEL: bcmp_size_31:
+; CHECK-ALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-NEXT:    addi sp, sp, -16
+; CHECK-ALIGNED-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-ALIGNED-RV64-NEXT:    li a2, 31
+; CHECK-ALIGNED-RV64-NEXT:    call bcmp
+; CHECK-ALIGNED-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-ALIGNED-RV64-NEXT:    addi sp, sp, 16
+; CHECK-ALIGNED-RV64-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-ZBB-LABEL: bcmp_size_31:
+; CHECK-ALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    addi sp, sp, -16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    li a2, 31
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    call bcmp
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    addi sp, sp, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-ZBKB-LABEL: bcmp_size_31:
+; CHECK-ALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    addi sp, sp, -16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    li a2, 31
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    call bcmp
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    addi sp, sp, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-V-LABEL: bcmp_size_31:
+; CHECK-ALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-V-NEXT:    addi sp, sp, -16
+; CHECK-ALIGNED-RV64-V-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-ALIGNED-RV64-V-NEXT:    li a2, 31
+; CHECK-ALIGNED-RV64-V-NEXT:    call bcmp
+; CHECK-ALIGNED-RV64-V-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-ALIGNED-RV64-V-NEXT:    addi sp, sp, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_31:
+; CHECK-UNALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:    ld a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a3, 8(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a4, 16(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a0, 23(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a5, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a6, 8(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a7, 16(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a1, 23(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    xor a2, a2, a5
+; CHECK-UNALIGNED-RV64-NEXT:    xor a3, a3, a6
+; CHECK-UNALIGNED-RV64-NEXT:    xor a4, a4, a7
+; CHECK-UNALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:    or a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a4, a0
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_31:
+; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ld a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ld a3, 8(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ld a4, 16(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ld a0, 23(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ld a5, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ld a6, 8(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ld a7, 16(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ld a1, 23(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a2, a2, a5
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a3, a3, a6
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a4, a4, a7
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a4, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_31:
+; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ld a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ld a3, 8(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ld a4, 16(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ld a0, 23(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ld a5, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ld a6, 8(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ld a7, 16(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ld a1, 23(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a2, a2, a5
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a3, a3, a6
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a4, a4, a7
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a0, a4, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_31:
+; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-V-NEXT:    ld a2, 0(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    ld a3, 8(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    ld a4, 16(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    ld a0, 23(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    ld a5, 0(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    ld a6, 8(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    ld a7, 16(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    ld a1, 23(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a5
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a3, a3, a6
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a4, a4, a7
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a2, a2, a3
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a4, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    ret
+entry:
+  %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 31)
+  ret i32 %bcmp
+}
+
+define i32 @bcmp_size_32(ptr %s1, ptr %s2) nounwind optsize {
+; CHECK-RV32-LABEL: bcmp_size_32:
+; CHECK-RV32:       # %bb.0: # %entry
+; CHECK-RV32-NEXT:    addi sp, sp, -16
+; CHECK-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    li a2, 32
+; CHECK-RV32-NEXT:    call bcmp
+; CHECK-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    addi sp, sp, 16
+; CHECK-RV32-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-LABEL: bcmp_size_32:
+; CHECK-ALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 3(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 4(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 5(a0)
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a4, a2
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 6(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 7(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a5, a3
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a6, a4
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    slli a3, a3, 32
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 0(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 1(a1)
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a3, a2
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 2(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 3(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a3, a3, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a6, a3
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 4(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 5(a1)
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 6(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 7(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a7, a4
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 32
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 8(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 9(a0)
+; CHECK-ALIGNED-RV64-NEXT:    xor a2, a2, a3
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 10(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 11(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    slli a3, a3, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a6, a3
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 12(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 13(a0)
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 14(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 15(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a7, a4
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 32
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 8(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 9(a1)
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 10(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 11(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a7, a4
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 12(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 13(a1)
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 14(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 15(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a6, a7, a6
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a5, t0, a5
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 32
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 16(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 17(a0)
+; CHECK-ALIGNED-RV64-NEXT:    xor a3, a3, a4
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 18(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 19(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a7, a4
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 20(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 21(a0)
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 22(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 23(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a6, a7, a6
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a5, t0, a5
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 32
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 16(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 17(a1)
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 18(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 19(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a6, a7, a6
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a5, t0, a5
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 20(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 21(a1)
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 22(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t1, 23(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a7, t0, a7
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli t1, t1, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a6, t1, a6
+; CHECK-ALIGNED-RV64-NEXT:    or a6, a6, a7
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 32
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 24(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 25(a0)
+; CHECK-ALIGNED-RV64-NEXT:    xor a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 26(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 27(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a6, a7, a6
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a5, t0, a5
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 28(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 29(a0)
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 30(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a0, 31(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a7, t0, a7
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a0, a0, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a0, a6
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a0, a7
+; CHECK-ALIGNED-RV64-NEXT:    slli a0, a0, 32
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 24(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 25(a1)
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a0, a5
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 26(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 27(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a7, a7, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a6, a7, a6
+; CHECK-ALIGNED-RV64-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a5, t0, a5
+; CHECK-ALIGNED-RV64-NEXT:    lbu a7, 28(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu t0, 29(a1)
+; CHECK-ALIGNED-RV64-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-NEXT:    lbu a6, 30(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a1, 31(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli t0, t0, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a7, t0, a7
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a1, a1, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a1, a1, a6
+; CHECK-ALIGNED-RV64-NEXT:    or a1, a1, a7
+; CHECK-ALIGNED-RV64-NEXT:    slli a1, a1, 32
+; CHECK-ALIGNED-RV64-NEXT:    or a1, a1, a5
+; CHECK-ALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a4, a0
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-ALIGNED-RV64-NEXT:    snez a0, a0
+; CHECK-ALIGNED-RV64-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-ZBB-LABEL: bcmp_size_32:
+; CHECK-ALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a5, 3(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a5, a5, 24
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a3, 4(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a5, 5(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a2, a4, a2
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a4, 6(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a6, 7(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a5, a5, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a3, a5, a3
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a4, a6, a4
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a3, a3, 32
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a4, 0(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a5, 1(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a2, a3, a2
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a3, 2(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a6, 3(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a5, a5, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a3, a3, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a3, a6, a3
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a5, 4(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a6, 5(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a4, 6(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a7, 7(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a6, a6, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a4, a7, a4
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a4, a4, 32
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a4, 8(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a5, 9(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a3, 10(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a6, 11(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a5, a5, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a3, a3, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a3, a6, a3
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a5, 12(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a6, 13(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a4, 14(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a7, 15(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a6, a6, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a4, a7, a4
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a4, a4, 32
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a5, 8(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a6, 9(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a4, 10(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a7, 11(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a6, a6, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a4, a7, a4
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a6, 12(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a7, 13(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a5, 14(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu t0, 15(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a7, a7, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a6, a7, a6
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a5, t0, a5
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a5, a5, 32
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a5, 16(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a6, 17(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    xor a3, a3, a4
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a4, 18(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a7, 19(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a6, a6, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a4, a7, a4
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a6, 20(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a7, 21(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a5, 22(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu t0, 23(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a7, a7, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a6, a7, a6
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a5, t0, a5
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a5, a5, 32
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a6, 16(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a7, 17(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a5, 18(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu t0, 19(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a7, a7, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a6, a7, a6
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a5, t0, a5
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a7, 20(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu t0, 21(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a6, 22(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu t1, 23(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli t0, t0, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a7, t0, a7
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli t1, t1, 24
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a6, t1, a6
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a6, a6, a7
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a6, a6, 32
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a6, 24(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a7, 25(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    xor a4, a4, a5
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a5, 26(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu t0, 27(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a7, a7, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a6, a7, a6
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a5, t0, a5
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a7, 28(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu t0, 29(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a6, 30(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a0, 31(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli t0, t0, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a7, t0, a7
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a0, a0, 24
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a0, a0, a6
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a0, a0, a7
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a0, a0, 32
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a6, 24(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a7, 25(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a0, a0, a5
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a5, 26(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu t0, 27(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a7, a7, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a6, a7, a6
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a5, t0, a5
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a7, 28(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu t0, 29(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a6, 30(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a1, 31(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli t0, t0, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a7, t0, a7
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a1, a1, 24
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a1, a1, a6
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a1, a1, a7
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a1, a1, 32
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a1, a1, a5
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a0, a4, a0
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a0, a2, a0
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    snez a0, a0
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-ZBKB-LABEL: bcmp_size_32:
+; CHECK-ALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a2, 4(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a3, 5(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a4, 6(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a5, 7(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a2, a2, a3
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a3, a4, a5
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a4, 0(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a5, 1(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a6, 2(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a7, 3(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a3, a3, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a2, a3, a2
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a3, a4, a5
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a4, a6, a7
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a5, 4(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a6, 5(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a7, 6(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t0, 7(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    pack a2, a3, a2
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a3, a5, a6
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a4, a7, t0
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a5, 0(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a6, 1(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a7, 2(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t0, 3(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a4, a5, a6
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a5, a7, t0
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a5, 12(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a6, 13(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a7, 14(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t0, 15(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    pack a3, a4, a3
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    xor a2, a2, a3
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a3, a5, a6
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a4, a7, t0
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a5, 8(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a6, 9(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a7, 10(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t0, 11(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a4, a5, a6
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a5, a7, t0
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a6, 12(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a7, 13(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t0, 14(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t1, 15(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    pack a3, a4, a3
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a4, a6, a7
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a5, t0, t1
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a6, 8(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a7, 9(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t0, 10(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t1, 11(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a5, a6, a7
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a6, t0, t1
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a6, 20(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a7, 21(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t0, 22(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t1, 23(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    pack a4, a5, a4
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    xor a3, a3, a4
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a4, a6, a7
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a5, t0, t1
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a6, 16(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a7, 17(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t0, 18(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t1, 19(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a5, a6, a7
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a6, t0, t1
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a7, 20(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t0, 21(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t1, 22(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t2, 23(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    pack a4, a5, a4
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a5, a7, t0
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a6, t1, t2
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a7, 16(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t0, 17(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t1, 18(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t2, 19(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a6, a7, t0
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a7, t1, t2
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a7, a7, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a6, a7, a6
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a7, 28(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t0, 29(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t1, 30(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t2, 31(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    pack a5, a6, a5
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    xor a4, a4, a5
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a5, a7, t0
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a6, t1, t2
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a7, 24(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t0, 25(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t1, 26(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a0, 27(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a6, a7, t0
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a0, t1, a0
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a0, a0, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a7, 28(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t0, 29(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t1, 30(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t2, 31(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a0, a0, a6
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    pack a0, a0, a5
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a5, a7, t0
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a6, t1, t2
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a7, 24(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t0, 25(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu t1, 26(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a1, 27(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a6, a7, t0
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a1, t1, a1
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a1, a1, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a1, a1, a6
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    pack a1, a1, a5
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a0, a4, a0
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    snez a0, a0
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-V-LABEL: bcmp_size_32:
+; CHECK-ALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a5, 3(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a5, a5, 24
+; CHECK-ALIGNED-RV64-V-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a3, 4(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a5, 5(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    or a2, a4, a2
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a4, 6(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a6, 7(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a5, a5, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a3, a5, a3
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV64-V-NEXT:    or a4, a6, a4
+; CHECK-ALIGNED-RV64-V-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a3, a3, 32
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a4, 0(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a5, 1(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    or a2, a3, a2
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a3, 2(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a6, 3(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a5, a5, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a3, a3, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV64-V-NEXT:    or a3, a6, a3
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a5, 4(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a6, 5(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a4, 6(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a7, 7(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a6, a6, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-V-NEXT:    or a4, a7, a4
+; CHECK-ALIGNED-RV64-V-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a4, a4, 32
+; CHECK-ALIGNED-RV64-V-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a4, 8(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a5, 9(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    xor a2, a2, a3
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a3, 10(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a6, 11(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a5, a5, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a3, a3, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a6, a6, 24
+; CHECK-ALIGNED-RV64-V-NEXT:    or a3, a6, a3
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a5, 12(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a6, 13(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    or a3, a3, a4
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a4, 14(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a7, 15(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a6, a6, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-V-NEXT:    or a4, a7, a4
+; CHECK-ALIGNED-RV64-V-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a4, a4, 32
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a5, 8(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a6, 9(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a4, 10(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a7, 11(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a6, a6, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-V-NEXT:    or a4, a7, a4
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a6, 12(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a7, 13(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a5, 14(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu t0, 15(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a7, a7, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a6, a7, a6
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-V-NEXT:    or a5, t0, a5
+; CHECK-ALIGNED-RV64-V-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a5, a5, 32
+; CHECK-ALIGNED-RV64-V-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a5, 16(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a6, 17(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    xor a3, a3, a4
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a4, 18(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a7, 19(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a6, a6, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a7, a7, 24
+; CHECK-ALIGNED-RV64-V-NEXT:    or a4, a7, a4
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a6, 20(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a7, 21(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a5, 22(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu t0, 23(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a7, a7, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a6, a7, a6
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-V-NEXT:    or a5, t0, a5
+; CHECK-ALIGNED-RV64-V-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a5, a5, 32
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a6, 16(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a7, 17(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    or a4, a5, a4
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a5, 18(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu t0, 19(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a7, a7, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a6, a7, a6
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-V-NEXT:    or a5, t0, a5
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a7, 20(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu t0, 21(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a6, 22(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu t1, 23(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli t0, t0, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a7, t0, a7
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    slli t1, t1, 24
+; CHECK-ALIGNED-RV64-V-NEXT:    or a6, t1, a6
+; CHECK-ALIGNED-RV64-V-NEXT:    or a6, a6, a7
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a6, a6, 32
+; CHECK-ALIGNED-RV64-V-NEXT:    or a5, a6, a5
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a6, 24(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a7, 25(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    xor a4, a4, a5
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a5, 26(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu t0, 27(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a7, a7, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a6, a7, a6
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-V-NEXT:    or a5, t0, a5
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a7, 28(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu t0, 29(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a6, 30(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a0, 31(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli t0, t0, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a7, t0, a7
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a0, a0, 24
+; CHECK-ALIGNED-RV64-V-NEXT:    or a0, a0, a6
+; CHECK-ALIGNED-RV64-V-NEXT:    or a0, a0, a7
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a0, a0, 32
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a6, 24(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a7, 25(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    or a0, a0, a5
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a5, 26(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu t0, 27(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a7, a7, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a6, a7, a6
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    slli t0, t0, 24
+; CHECK-ALIGNED-RV64-V-NEXT:    or a5, t0, a5
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a7, 28(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu t0, 29(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    or a5, a5, a6
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a6, 30(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a1, 31(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli t0, t0, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a7, t0, a7
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a1, a1, 24
+; CHECK-ALIGNED-RV64-V-NEXT:    or a1, a1, a6
+; CHECK-ALIGNED-RV64-V-NEXT:    or a1, a1, a7
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a1, a1, 32
+; CHECK-ALIGNED-RV64-V-NEXT:    or a1, a1, a5
+; CHECK-ALIGNED-RV64-V-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV64-V-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-V-NEXT:    or a0, a4, a0
+; CHECK-ALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-ALIGNED-RV64-V-NEXT:    snez a0, a0
+; CHECK-ALIGNED-RV64-V-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_32:
+; CHECK-UNALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:    ld a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a3, 8(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a4, 16(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a0, 24(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a5, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a6, 8(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a7, 16(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    ld a1, 24(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    xor a2, a2, a5
+; CHECK-UNALIGNED-RV64-NEXT:    xor a3, a3, a6
+; CHECK-UNALIGNED-RV64-NEXT:    xor a4, a4, a7
+; CHECK-UNALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:    or a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a4, a0
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_32:
+; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ld a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ld a3, 8(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ld a4, 16(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ld a0, 24(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ld a5, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ld a6, 8(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ld a7, 16(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ld a1, 24(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a2, a2, a5
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a3, a3, a6
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a4, a4, a7
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a4, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_32:
+; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ld a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ld a3, 8(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ld a4, 16(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ld a0, 24(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ld a5, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ld a6, 8(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ld a7, 16(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ld a1, 24(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a2, a2, a5
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a3, a3, a6
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a4, a4, a7
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a0, a4, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_32:
+; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-V-NEXT:    ld a2, 0(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    ld a3, 8(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    ld a4, 16(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    ld a0, 24(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    ld a5, 0(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    ld a6, 8(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    ld a7, 16(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    ld a1, 24(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a2, a2, a5
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a3, a3, a6
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a4, a4, a7
+; CHECK-UNALIGNED-RV64-V-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a2, a2, a3
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a4, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    ret
+entry:
+  %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 32)
+  ret i32 %bcmp
+}
+
+define i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind optsize {
+; CHECK-RV32-LABEL: bcmp_size_63:
+; CHECK-RV32:       # %bb.0: # %entry
+; CHECK-RV32-NEXT:    addi sp, sp, -16
+; CHECK-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    li a2, 63
+; CHECK-RV32-NEXT:    call bcmp
+; CHECK-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    addi sp, sp, 16
+; CHECK-RV32-NEXT:    ret
+;
+; CHECK-RV64-LABEL: bcmp_size_63:
+; CHECK-RV64:       # %bb.0: # %entry
+; CHECK-RV64-NEXT:    addi sp, sp, -16
+; CHECK-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    li a2, 63
+; CHECK-RV64-NEXT:    call bcmp
+; CHECK-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    addi sp, sp, 16
+; CHECK-RV64-NEXT:    ret
+entry:
+  %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 63)
+  ret i32 %bcmp
+}
+
+define i32 @bcmp_size_64(ptr %s1, ptr %s2) nounwind optsize {
+; CHECK-RV32-LABEL: bcmp_size_64:
+; CHECK-RV32:       # %bb.0: # %entry
+; CHECK-RV32-NEXT:    addi sp, sp, -16
+; CHECK-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    li a2, 64
+; CHECK-RV32-NEXT:    call bcmp
+; CHECK-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    addi sp, sp, 16
+; CHECK-RV32-NEXT:    ret
+;
+; CHECK-RV64-LABEL: bcmp_size_64:
+; CHECK-RV64:       # %bb.0: # %entry
+; CHECK-RV64-NEXT:    addi sp, sp, -16
+; CHECK-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    li a2, 64
+; CHECK-RV64-NEXT:    call bcmp
+; CHECK-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    addi sp, sp, 16
+; CHECK-RV64-NEXT:    ret
+entry:
+  %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 64)
+  ret i32 %bcmp
+}
+
+define i32 @bcmp_size_127(ptr %s1, ptr %s2) nounwind optsize {
+; CHECK-RV32-LABEL: bcmp_size_127:
+; CHECK-RV32:       # %bb.0: # %entry
+; CHECK-RV32-NEXT:    addi sp, sp, -16
+; CHECK-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    li a2, 127
+; CHECK-RV32-NEXT:    call bcmp
+; CHECK-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    addi sp, sp, 16
+; CHECK-RV32-NEXT:    ret
+;
+; CHECK-RV64-LABEL: bcmp_size_127:
+; CHECK-RV64:       # %bb.0: # %entry
+; CHECK-RV64-NEXT:    addi sp, sp, -16
+; CHECK-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    li a2, 127
+; CHECK-RV64-NEXT:    call bcmp
+; CHECK-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    addi sp, sp, 16
+; CHECK-RV64-NEXT:    ret
+entry:
+  %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 127)
+  ret i32 %bcmp
+}
+
+define i32 @bcmp_size_128(ptr %s1, ptr %s2) nounwind optsize {
+; CHECK-RV32-LABEL: bcmp_size_128:
+; CHECK-RV32:       # %bb.0: # %entry
+; CHECK-RV32-NEXT:    addi sp, sp, -16
+; CHECK-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    li a2, 128
+; CHECK-RV32-NEXT:    call bcmp
+; CHECK-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    addi sp, sp, 16
+; CHECK-RV32-NEXT:    ret
+;
+; CHECK-RV64-LABEL: bcmp_size_128:
+; CHECK-RV64:       # %bb.0: # %entry
+; CHECK-RV64-NEXT:    addi sp, sp, -16
+; CHECK-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    li a2, 128
+; CHECK-RV64-NEXT:    call bcmp
+; CHECK-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    addi sp, sp, 16
+; CHECK-RV64-NEXT:    ret
+entry:
+  %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 128)
+  ret i32 %bcmp
+}
+
+define i32 @bcmp_size_runtime(ptr %s1, ptr %s2, iXLen %len) nounwind optsize {
+; CHECK-RV32-LABEL: bcmp_size_runtime:
+; CHECK-RV32:       # %bb.0: # %entry
+; CHECK-RV32-NEXT:    addi sp, sp, -16
+; CHECK-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; CHECK-RV32-NEXT:    call bcmp
 ; CHECK-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    addi sp, sp, 16
 ; CHECK-RV32-NEXT:    ret
 ;
-; CHECK-RV64-LABEL: bcmp_size_31:
-; CHECK-RV64:       # %bb.0: # %entry
-; CHECK-RV64-NEXT:    addi sp, sp, -16
-; CHECK-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; CHECK-RV64-NEXT:    li a2, 31
-; CHECK-RV64-NEXT:    call bcmp
-; CHECK-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    addi sp, sp, 16
-; CHECK-RV64-NEXT:    ret
+; CHECK-RV64-LABEL: bcmp_size_runtime:
+; CHECK-RV64:       # %bb.0: # %entry
+; CHECK-RV64-NEXT:    addi sp, sp, -16
+; CHECK-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    call bcmp
+; CHECK-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    addi sp, sp, 16
+; CHECK-RV64-NEXT:    ret
+entry:
+  %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen %len)
+  ret i32 %bcmp
+}
+
+define i1 @bcmp_eq_zero(ptr %s1, ptr %s2) nounwind optsize {
+; CHECK-ALIGNED-RV32-LABEL: bcmp_eq_zero:
+; CHECK-ALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a0, 3(a0)
+; CHECK-ALIGNED-RV32-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV32-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV32-NEXT:    slli a0, a0, 24
+; CHECK-ALIGNED-RV32-NEXT:    or a0, a0, a4
+; CHECK-ALIGNED-RV32-NEXT:    lbu a3, 0(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a4, 1(a1)
+; CHECK-ALIGNED-RV32-NEXT:    or a0, a0, a2
+; CHECK-ALIGNED-RV32-NEXT:    lbu a2, 2(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a1, 3(a1)
+; CHECK-ALIGNED-RV32-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV32-NEXT:    slli a2, a2, 16
+; CHECK-ALIGNED-RV32-NEXT:    slli a1, a1, 24
+; CHECK-ALIGNED-RV32-NEXT:    or a1, a1, a2
+; CHECK-ALIGNED-RV32-NEXT:    or a1, a1, a3
+; CHECK-ALIGNED-RV32-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV32-NEXT:    seqz a0, a0
+; CHECK-ALIGNED-RV32-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-LABEL: bcmp_eq_zero:
+; CHECK-ALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lb a0, 3(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a0, a0, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a0, a4
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 0(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 1(a1)
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a0, a2
+; CHECK-ALIGNED-RV64-NEXT:    lbu a2, 2(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lb a1, 3(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    slli a2, a2, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a1, a1, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a1, a1, a2
+; CHECK-ALIGNED-RV64-NEXT:    or a1, a1, a3
+; CHECK-ALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV64-NEXT:    seqz a0, a0
+; CHECK-ALIGNED-RV64-NEXT:    ret
+;
+; CHECK-ALIGNED-RV32-ZBB-LABEL: bcmp_eq_zero:
+; CHECK-ALIGNED-RV32-ZBB:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a0, 3(a0)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    slli a0, a0, 24
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    or a0, a0, a4
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a3, 0(a1)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a4, 1(a1)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    or a0, a0, a2
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a2, 2(a1)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a1, 3(a1)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    slli a2, a2, 16
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    slli a1, a1, 24
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    or a1, a1, a2
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    or a1, a1, a3
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    seqz a0, a0
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-ZBB-LABEL: bcmp_eq_zero:
+; CHECK-ALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lb a0, 3(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a0, a0, 24
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a0, a0, a4
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a3, 0(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a4, 1(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a0, a0, a2
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a2, 2(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lb a1, 3(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a2, a2, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a1, a1, 24
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a1, a1, a2
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a1, a1, a3
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    seqz a0, a0
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-ALIGNED-RV32-ZBKB-LABEL: bcmp_eq_zero:
+; CHECK-ALIGNED-RV32-ZBKB:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a2, 0(a0)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a3, 1(a0)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a0, 3(a0)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a5, 0(a1)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a6, 1(a1)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a7, 2(a1)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a1, 3(a1)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    packh a0, a4, a0
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    packh a2, a2, a3
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    pack a0, a2, a0
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    packh a1, a7, a1
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    packh a2, a5, a6
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    pack a1, a2, a1
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    seqz a0, a0
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-ZBKB-LABEL: bcmp_eq_zero:
+; CHECK-ALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a2, 0(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a3, 1(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lb a0, 3(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a2, a2, a3
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a0, a0, 24
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a0, a0, a4
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a3, 0(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a4, 1(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a5, 2(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lb a1, 3(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a0, a0, a2
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a2, a3, a4
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a1, a1, 24
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a1, a1, a5
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a1, a1, a2
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    seqz a0, a0
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-ALIGNED-RV32-V-LABEL: bcmp_eq_zero:
+; CHECK-ALIGNED-RV32-V:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a0, 3(a0)
+; CHECK-ALIGNED-RV32-V-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV32-V-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV32-V-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV32-V-NEXT:    slli a0, a0, 24
+; CHECK-ALIGNED-RV32-V-NEXT:    or a0, a0, a4
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a3, 0(a1)
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a4, 1(a1)
+; CHECK-ALIGNED-RV32-V-NEXT:    or a0, a0, a2
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a2, 2(a1)
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a1, 3(a1)
+; CHECK-ALIGNED-RV32-V-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV32-V-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV32-V-NEXT:    slli a2, a2, 16
+; CHECK-ALIGNED-RV32-V-NEXT:    slli a1, a1, 24
+; CHECK-ALIGNED-RV32-V-NEXT:    or a1, a1, a2
+; CHECK-ALIGNED-RV32-V-NEXT:    or a1, a1, a3
+; CHECK-ALIGNED-RV32-V-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV32-V-NEXT:    seqz a0, a0
+; CHECK-ALIGNED-RV32-V-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-V-LABEL: bcmp_eq_zero:
+; CHECK-ALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lb a0, 3(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a0, a0, 24
+; CHECK-ALIGNED-RV64-V-NEXT:    or a0, a0, a4
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a3, 0(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a4, 1(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    or a0, a0, a2
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a2, 2(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lb a1, 3(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a2, a2, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a1, a1, 24
+; CHECK-ALIGNED-RV64-V-NEXT:    or a1, a1, a2
+; CHECK-ALIGNED-RV64-V-NEXT:    or a1, a1, a3
+; CHECK-ALIGNED-RV64-V-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV64-V-NEXT:    seqz a0, a0
+; CHECK-ALIGNED-RV64-V-NEXT:    ret
+;
+; CHECK-UNALIGNED-LABEL: bcmp_eq_zero:
+; CHECK-UNALIGNED:       # %bb.0: # %entry
+; CHECK-UNALIGNED-NEXT:    lw a0, 0(a0)
+; CHECK-UNALIGNED-NEXT:    lw a1, 0(a1)
+; CHECK-UNALIGNED-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-NEXT:    seqz a0, a0
+; CHECK-UNALIGNED-NEXT:    ret
+entry:
+  %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 4)
+  %ret = icmp eq i32 %bcmp, 0
+  ret i1 %ret
+}
+
+define i1 @bcmp_lt_zero(ptr %s1, ptr %s2) nounwind optsize {
+; CHECK-LABEL: bcmp_lt_zero:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 4)
+  %ret = icmp slt i32 %bcmp, 0
+  ret i1 %ret
+}
+
+define i1 @bcmp_gt_zero(ptr %s1, ptr %s2) nounwind optsize {
+; CHECK-ALIGNED-RV32-LABEL: bcmp_gt_zero:
+; CHECK-ALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a0, 3(a0)
+; CHECK-ALIGNED-RV32-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV32-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV32-NEXT:    slli a0, a0, 24
+; CHECK-ALIGNED-RV32-NEXT:    or a0, a0, a4
+; CHECK-ALIGNED-RV32-NEXT:    lbu a3, 0(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a4, 1(a1)
+; CHECK-ALIGNED-RV32-NEXT:    or a0, a0, a2
+; CHECK-ALIGNED-RV32-NEXT:    lbu a2, 2(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a1, 3(a1)
+; CHECK-ALIGNED-RV32-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV32-NEXT:    slli a2, a2, 16
+; CHECK-ALIGNED-RV32-NEXT:    slli a1, a1, 24
+; CHECK-ALIGNED-RV32-NEXT:    or a1, a1, a2
+; CHECK-ALIGNED-RV32-NEXT:    or a1, a1, a3
+; CHECK-ALIGNED-RV32-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV32-NEXT:    snez a0, a0
+; CHECK-ALIGNED-RV32-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-LABEL: bcmp_gt_zero:
+; CHECK-ALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lb a0, 3(a0)
+; CHECK-ALIGNED-RV64-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a0, a0, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a0, a4
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 0(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 1(a1)
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a0, a2
+; CHECK-ALIGNED-RV64-NEXT:    lbu a2, 2(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lb a1, 3(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    slli a2, a2, 16
+; CHECK-ALIGNED-RV64-NEXT:    slli a1, a1, 24
+; CHECK-ALIGNED-RV64-NEXT:    or a1, a1, a2
+; CHECK-ALIGNED-RV64-NEXT:    or a1, a1, a3
+; CHECK-ALIGNED-RV64-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV64-NEXT:    snez a0, a0
+; CHECK-ALIGNED-RV64-NEXT:    ret
+;
+; CHECK-ALIGNED-RV32-ZBB-LABEL: bcmp_gt_zero:
+; CHECK-ALIGNED-RV32-ZBB:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a0, 3(a0)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    slli a0, a0, 24
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    or a0, a0, a4
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a3, 0(a1)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a4, 1(a1)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    or a0, a0, a2
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a2, 2(a1)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a1, 3(a1)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    slli a2, a2, 16
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    slli a1, a1, 24
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    or a1, a1, a2
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    or a1, a1, a3
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    snez a0, a0
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-ZBB-LABEL: bcmp_gt_zero:
+; CHECK-ALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lb a0, 3(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a0, a0, 24
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a0, a0, a4
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a3, 0(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a4, 1(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a0, a0, a2
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a2, 2(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lb a1, 3(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a2, a2, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a1, a1, 24
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a1, a1, a2
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a1, a1, a3
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    snez a0, a0
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-ALIGNED-RV32-ZBKB-LABEL: bcmp_gt_zero:
+; CHECK-ALIGNED-RV32-ZBKB:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a2, 0(a0)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a3, 1(a0)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a0, 3(a0)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a5, 0(a1)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a6, 1(a1)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a7, 2(a1)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a1, 3(a1)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    packh a0, a4, a0
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    packh a2, a2, a3
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    pack a0, a2, a0
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    packh a1, a7, a1
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    packh a2, a5, a6
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    pack a1, a2, a1
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    snez a0, a0
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-ZBKB-LABEL: bcmp_gt_zero:
+; CHECK-ALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a2, 0(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a3, 1(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lb a0, 3(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a2, a2, a3
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a0, a0, 24
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a0, a0, a4
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a3, 0(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a4, 1(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a5, 2(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lb a1, 3(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a0, a0, a2
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    packh a2, a3, a4
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a5, a5, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a1, a1, 24
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a1, a1, a5
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a1, a1, a2
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    snez a0, a0
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-ALIGNED-RV32-V-LABEL: bcmp_gt_zero:
+; CHECK-ALIGNED-RV32-V:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a0, 3(a0)
+; CHECK-ALIGNED-RV32-V-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV32-V-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV32-V-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV32-V-NEXT:    slli a0, a0, 24
+; CHECK-ALIGNED-RV32-V-NEXT:    or a0, a0, a4
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a3, 0(a1)
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a4, 1(a1)
+; CHECK-ALIGNED-RV32-V-NEXT:    or a0, a0, a2
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a2, 2(a1)
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a1, 3(a1)
+; CHECK-ALIGNED-RV32-V-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV32-V-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV32-V-NEXT:    slli a2, a2, 16
+; CHECK-ALIGNED-RV32-V-NEXT:    slli a1, a1, 24
+; CHECK-ALIGNED-RV32-V-NEXT:    or a1, a1, a2
+; CHECK-ALIGNED-RV32-V-NEXT:    or a1, a1, a3
+; CHECK-ALIGNED-RV32-V-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV32-V-NEXT:    snez a0, a0
+; CHECK-ALIGNED-RV32-V-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-V-LABEL: bcmp_gt_zero:
+; CHECK-ALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lb a0, 3(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a4, a4, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a0, a0, 24
+; CHECK-ALIGNED-RV64-V-NEXT:    or a0, a0, a4
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a3, 0(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a4, 1(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    or a0, a0, a2
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a2, 2(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lb a1, 3(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a3, a4, a3
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a2, a2, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a1, a1, 24
+; CHECK-ALIGNED-RV64-V-NEXT:    or a1, a1, a2
+; CHECK-ALIGNED-RV64-V-NEXT:    or a1, a1, a3
+; CHECK-ALIGNED-RV64-V-NEXT:    xor a0, a0, a1
+; CHECK-ALIGNED-RV64-V-NEXT:    snez a0, a0
+; CHECK-ALIGNED-RV64-V-NEXT:    ret
+;
+; CHECK-UNALIGNED-LABEL: bcmp_gt_zero:
+; CHECK-UNALIGNED:       # %bb.0: # %entry
+; CHECK-UNALIGNED-NEXT:    lw a0, 0(a0)
+; CHECK-UNALIGNED-NEXT:    lw a1, 0(a1)
+; CHECK-UNALIGNED-NEXT:    xor a0, a0, a1
+; CHECK-UNALIGNED-NEXT:    snez a0, a0
+; CHECK-UNALIGNED-NEXT:    ret
+entry:
+  %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 4)
+  %ret = icmp sgt i32 %bcmp, 0
+  ret i1 %ret
+}
+
+define i32 @memcmp_size_0(ptr %s1, ptr %s2) nounwind optsize {
+; CHECK-LABEL: memcmp_size_0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li a0, 0
+; CHECK-NEXT:    ret
 entry:
-  %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 31)
-  ret i32 %bcmp
+  %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iXLen 0)
+  ret i32 %memcmp
 }
 
-define i32 @bcmp_size_32(ptr %s1, ptr %s2) nounwind optsize {
-; CHECK-RV32-LABEL: bcmp_size_32:
-; CHECK-RV32:       # %bb.0: # %entry
-; CHECK-RV32-NEXT:    addi sp, sp, -16
-; CHECK-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT:    li a2, 32
-; CHECK-RV32-NEXT:    call bcmp
-; CHECK-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    addi sp, sp, 16
-; CHECK-RV32-NEXT:    ret
+define i32 @memcmp_size_1(ptr %s1, ptr %s2) nounwind optsize {
+; CHECK-LABEL: memcmp_size_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lbu a0, 0(a0)
+; CHECK-NEXT:    lbu a1, 0(a1)
+; CHECK-NEXT:    sub a0, a0, a1
+; CHECK-NEXT:    ret
+entry:
+  %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iXLen 1)
+  ret i32 %memcmp
+}
+
+define i32 @memcmp_size_2(ptr %s1, ptr %s2) nounwind optsize {
+; CHECK-ALIGNED-RV32-LABEL: memcmp_size_2:
+; CHECK-ALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-NEXT:    lbu a2, 0(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a0, 1(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a3, 0(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a1, 1(a1)
+; CHECK-ALIGNED-RV32-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a0, a2, a0
+; CHECK-ALIGNED-RV32-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a1, a3, a1
+; CHECK-ALIGNED-RV32-NEXT:    sub a0, a0, a1
+; CHECK-ALIGNED-RV32-NEXT:    ret
 ;
-; CHECK-RV64-LABEL: bcmp_size_32:
-; CHECK-RV64:       # %bb.0: # %entry
-; CHECK-RV64-NEXT:    addi sp, sp, -16
-; CHECK-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; CHECK-RV64-NEXT:    li a2, 32
-; CHECK-RV64-NEXT:    call bcmp
-; CHECK-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    addi sp, sp, 16
-; CHECK-RV64-NEXT:    ret
+; CHECK-ALIGNED-RV64-LABEL: memcmp_size_2:
+; CHECK-ALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-NEXT:    lbu a2, 0(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a0, 1(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 0(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a1, 1(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a0, a2, a0
+; CHECK-ALIGNED-RV64-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV64-NEXT:    or a1, a3, a1
+; CHECK-ALIGNED-RV64-NEXT:    sub a0, a0, a1
+; CHECK-ALIGNED-RV64-NEXT:    ret
+;
+; CHECK-ALIGNED-RV32-ZBB-LABEL: memcmp_size_2:
+; CHECK-ALIGNED-RV32-ZBB:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a0, 0(a0)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a3, 1(a1)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a1, 0(a1)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    or a0, a2, a0
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    or a1, a3, a1
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    rev8 a0, a0
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    srli a0, a0, 16
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    rev8 a1, a1
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    srli a1, a1, 16
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    sub a0, a0, a1
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-ZBB-LABEL: memcmp_size_2:
+; CHECK-ALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a0, 0(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a3, 1(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a1, 0(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a0, a2, a0
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a1, a3, a1
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    rev8 a0, a0
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    srli a0, a0, 48
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    rev8 a1, a1
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    srli a1, a1, 48
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    sub a0, a0, a1
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-ALIGNED-RV32-ZBKB-LABEL: memcmp_size_2:
+; CHECK-ALIGNED-RV32-ZBKB:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a0, 0(a0)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a3, 1(a1)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a1, 0(a1)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    or a1, a3, a1
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    rev8 a0, a0
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    srli a0, a0, 16
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    rev8 a1, a1
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    srli a1, a1, 16
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    sub a0, a0, a1
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-ZBKB-LABEL: memcmp_size_2:
+; CHECK-ALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a0, 0(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a3, 1(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a1, 0(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a0, a2, a0
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a1, a3, a1
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    rev8 a0, a0
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    srli a0, a0, 48
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    rev8 a1, a1
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    srli a1, a1, 48
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    sub a0, a0, a1
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-ALIGNED-RV32-V-LABEL: memcmp_size_2:
+; CHECK-ALIGNED-RV32-V:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a2, 0(a0)
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a0, 1(a0)
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a3, 0(a1)
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a1, 1(a1)
+; CHECK-ALIGNED-RV32-V-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV32-V-NEXT:    or a0, a2, a0
+; CHECK-ALIGNED-RV32-V-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV32-V-NEXT:    or a1, a3, a1
+; CHECK-ALIGNED-RV32-V-NEXT:    sub a0, a0, a1
+; CHECK-ALIGNED-RV32-V-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-V-LABEL: memcmp_size_2:
+; CHECK-ALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a2, 0(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a0, 1(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a3, 0(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a1, 1(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a0, a2, a0
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a3, a3, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    or a1, a3, a1
+; CHECK-ALIGNED-RV64-V-NEXT:    sub a0, a0, a1
+; CHECK-ALIGNED-RV64-V-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-LABEL: memcmp_size_2:
+; CHECK-UNALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT:    lhu a0, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lhu a1, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    srli a2, a0, 8
+; CHECK-UNALIGNED-RV32-NEXT:    slli a0, a0, 8
+; CHECK-UNALIGNED-RV32-NEXT:    or a0, a0, a2
+; CHECK-UNALIGNED-RV32-NEXT:    srli a2, a1, 8
+; CHECK-UNALIGNED-RV32-NEXT:    slli a1, a1, 8
+; CHECK-UNALIGNED-RV32-NEXT:    or a1, a1, a2
+; CHECK-UNALIGNED-RV32-NEXT:    lui a2, 16
+; CHECK-UNALIGNED-RV32-NEXT:    addi a2, a2, -1
+; CHECK-UNALIGNED-RV32-NEXT:    and a0, a0, a2
+; CHECK-UNALIGNED-RV32-NEXT:    and a1, a1, a2
+; CHECK-UNALIGNED-RV32-NEXT:    sub a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: memcmp_size_2:
+; CHECK-UNALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:    lhu a0, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lhu a1, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    srli a2, a0, 8
+; CHECK-UNALIGNED-RV64-NEXT:    slli a0, a0, 8
+; CHECK-UNALIGNED-RV64-NEXT:    or a0, a0, a2
+; CHECK-UNALIGNED-RV64-NEXT:    srli a2, a1, 8
+; CHECK-UNALIGNED-RV64-NEXT:    slli a1, a1, 8
+; CHECK-UNALIGNED-RV64-NEXT:    or a1, a1, a2
+; CHECK-UNALIGNED-RV64-NEXT:    lui a2, 16
+; CHECK-UNALIGNED-RV64-NEXT:    addiw a2, a2, -1
+; CHECK-UNALIGNED-RV64-NEXT:    and a0, a0, a2
+; CHECK-UNALIGNED-RV64-NEXT:    and a1, a1, a2
+; CHECK-UNALIGNED-RV64-NEXT:    sub a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBB-LABEL: memcmp_size_2:
+; CHECK-UNALIGNED-RV32-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lh a0, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lh a1, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    srli a0, a0, 16
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    srli a1, a1, 16
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sub a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_2:
+; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lh a0, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lh a1, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a0, a0, 48
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a1, a1, 48
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sub a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_2:
+; CHECK-UNALIGNED-RV32-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lh a0, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lh a1, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    srli a0, a0, 16
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    srli a1, a1, 16
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sub a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_2:
+; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lh a0, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lh a1, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a0, a0, 48
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a1, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a1, a1, 48
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sub a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_2:
+; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-V-NEXT:    lhu a0, 0(a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    lhu a1, 0(a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    srli a2, a0, 8
+; CHECK-UNALIGNED-RV32-V-NEXT:    slli a0, a0, 8
+; CHECK-UNALIGNED-RV32-V-NEXT:    or a0, a0, a2
+; CHECK-UNALIGNED-RV32-V-NEXT:    srli a2, a1, 8
+; CHECK-UNALIGNED-RV32-V-NEXT:    slli a1, a1, 8
+; CHECK-UNALIGNED-RV32-V-NEXT:    or a1, a1, a2
+; CHECK-UNALIGNED-RV32-V-NEXT:    lui a2, 16
+; CHECK-UNALIGNED-RV32-V-NEXT:    addi a2, a2, -1
+; CHECK-UNALIGNED-RV32-V-NEXT:    and a0, a0, a2
+; CHECK-UNALIGNED-RV32-V-NEXT:    and a1, a1, a2
+; CHECK-UNALIGNED-RV32-V-NEXT:    sub a0, a0, a1
+; CHECK-UNALIGNED-RV32-V-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-V-LABEL: memcmp_size_2:
+; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-V-NEXT:    lhu a0, 0(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lhu a1, 0(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    srli a2, a0, 8
+; CHECK-UNALIGNED-RV64-V-NEXT:    slli a0, a0, 8
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a0, a0, a2
+; CHECK-UNALIGNED-RV64-V-NEXT:    srli a2, a1, 8
+; CHECK-UNALIGNED-RV64-V-NEXT:    slli a1, a1, 8
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a1, a1, a2
+; CHECK-UNALIGNED-RV64-V-NEXT:    lui a2, 16
+; CHECK-UNALIGNED-RV64-V-NEXT:    addiw a2, a2, -1
+; CHECK-UNALIGNED-RV64-V-NEXT:    and a0, a0, a2
+; CHECK-UNALIGNED-RV64-V-NEXT:    and a1, a1, a2
+; CHECK-UNALIGNED-RV64-V-NEXT:    sub a0, a0, a1
+; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
-  %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 32)
-  ret i32 %bcmp
+  %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iXLen 2)
+  ret i32 %memcmp
 }
 
-define i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind optsize {
-; CHECK-RV32-LABEL: bcmp_size_63:
-; CHECK-RV32:       # %bb.0: # %entry
-; CHECK-RV32-NEXT:    addi sp, sp, -16
-; CHECK-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT:    li a2, 63
-; CHECK-RV32-NEXT:    call bcmp
-; CHECK-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    addi sp, sp, 16
-; CHECK-RV32-NEXT:    ret
+define i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind optsize {
+; CHECK-ALIGNED-RV32-LABEL: memcmp_size_3:
+; CHECK-ALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-NEXT:    lbu a2, 0(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a3, 1(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a4, 0(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a5, 1(a1)
+; CHECK-ALIGNED-RV32-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV32-NEXT:    slli a6, a3, 16
+; CHECK-ALIGNED-RV32-NEXT:    or a2, a6, a2
+; CHECK-ALIGNED-RV32-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV32-NEXT:    lui a3, 16
+; CHECK-ALIGNED-RV32-NEXT:    addi a3, a3, -1
+; CHECK-ALIGNED-RV32-NEXT:    and a2, a2, a3
+; CHECK-ALIGNED-RV32-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV32-NEXT:    slli a6, a5, 16
+; CHECK-ALIGNED-RV32-NEXT:    or a4, a6, a4
+; CHECK-ALIGNED-RV32-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV32-NEXT:    and a3, a4, a3
+; CHECK-ALIGNED-RV32-NEXT:    bne a2, a3, .LBB24_2
+; CHECK-ALIGNED-RV32-NEXT:  # %bb.1: # %loadbb1
+; CHECK-ALIGNED-RV32-NEXT:    lbu a0, 2(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a1, 2(a1)
+; CHECK-ALIGNED-RV32-NEXT:    sub a0, a0, a1
+; CHECK-ALIGNED-RV32-NEXT:    ret
+; CHECK-ALIGNED-RV32-NEXT:  .LBB24_2: # %res_block
+; CHECK-ALIGNED-RV32-NEXT:    sltu a0, a2, a3
+; CHECK-ALIGNED-RV32-NEXT:    neg a0, a0
+; CHECK-ALIGNED-RV32-NEXT:    ori a0, a0, 1
+; CHECK-ALIGNED-RV32-NEXT:    ret
 ;
-; CHECK-RV64-LABEL: bcmp_size_63:
-; CHECK-RV64:       # %bb.0: # %entry
-; CHECK-RV64-NEXT:    addi sp, sp, -16
-; CHECK-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; CHECK-RV64-NEXT:    li a2, 63
-; CHECK-RV64-NEXT:    call bcmp
-; CHECK-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    addi sp, sp, 16
-; CHECK-RV64-NEXT:    ret
+; CHECK-ALIGNED-RV64-LABEL: memcmp_size_3:
+; CHECK-ALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-NEXT:    lbu a2, 0(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 1(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 0(a1)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a5, 1(a1)
+; CHECK-ALIGNED-RV64-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a3, 16
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a6, a2
+; CHECK-ALIGNED-RV64-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-NEXT:    lui a3, 16
+; CHECK-ALIGNED-RV64-NEXT:    addiw a3, a3, -1
+; CHECK-ALIGNED-RV64-NEXT:    and a2, a2, a3
+; CHECK-ALIGNED-RV64-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-NEXT:    slli a6, a5, 16
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a6, a4
+; CHECK-ALIGNED-RV64-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-NEXT:    and a3, a4, a3
+; CHECK-ALIGNED-RV64-NEXT:    bne a2, a3, .LBB24_2
+; CHECK-ALIGNED-RV64-NEXT:  # %bb.1: # %loadbb1
+; CHECK-ALIGNED-RV64-NEXT:    lbu a0, 2(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a1, 2(a1)
+; CHECK-ALIGNED-RV64-NEXT:    sub a0, a0, a1
+; CHECK-ALIGNED-RV64-NEXT:    ret
+; CHECK-ALIGNED-RV64-NEXT:  .LBB24_2: # %res_block
+; CHECK-ALIGNED-RV64-NEXT:    sltu a0, a2, a3
+; CHECK-ALIGNED-RV64-NEXT:    neg a0, a0
+; CHECK-ALIGNED-RV64-NEXT:    ori a0, a0, 1
+; CHECK-ALIGNED-RV64-NEXT:    ret
+;
+; CHECK-ALIGNED-RV32-ZBB-LABEL: memcmp_size_3:
+; CHECK-ALIGNED-RV32-ZBB:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a4, 1(a1)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a5, 0(a1)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    rev8 a2, a2
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    srli a2, a2, 16
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    rev8 a3, a4
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    srli a3, a3, 16
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    bne a2, a3, .LBB24_2
+; CHECK-ALIGNED-RV32-ZBB-NEXT:  # %bb.1: # %loadbb1
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a0, 2(a0)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    lbu a1, 2(a1)
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    sub a0, a0, a1
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    ret
+; CHECK-ALIGNED-RV32-ZBB-NEXT:  .LBB24_2: # %res_block
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    sltu a0, a2, a3
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    neg a0, a0
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    ori a0, a0, 1
+; CHECK-ALIGNED-RV32-ZBB-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-ZBB-LABEL: memcmp_size_3:
+; CHECK-ALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a4, 1(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a5, 0(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    rev8 a2, a2
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    srli a2, a2, 48
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    rev8 a3, a4
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    srli a3, a3, 48
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    bne a2, a3, .LBB24_2
+; CHECK-ALIGNED-RV64-ZBB-NEXT:  # %bb.1: # %loadbb1
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a0, 2(a0)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    lbu a1, 2(a1)
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    sub a0, a0, a1
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    ret
+; CHECK-ALIGNED-RV64-ZBB-NEXT:  .LBB24_2: # %res_block
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    sltu a0, a2, a3
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    neg a0, a0
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    ori a0, a0, 1
+; CHECK-ALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-ALIGNED-RV32-ZBKB-LABEL: memcmp_size_3:
+; CHECK-ALIGNED-RV32-ZBKB:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a4, 1(a1)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a5, 0(a1)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    rev8 a2, a2
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    srli a2, a2, 16
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    rev8 a3, a4
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    srli a3, a3, 16
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    bne a2, a3, .LBB24_2
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:  # %bb.1: # %loadbb1
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a0, 2(a0)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    lbu a1, 2(a1)
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    sub a0, a0, a1
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    ret
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:  .LBB24_2: # %res_block
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    sltu a0, a2, a3
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    neg a0, a0
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    ori a0, a0, 1
+; CHECK-ALIGNED-RV32-ZBKB-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-ZBKB-LABEL: memcmp_size_3:
+; CHECK-ALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a2, 1(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a3, 0(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a4, 1(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a5, 0(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    rev8 a2, a2
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    srli a2, a2, 48
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    rev8 a3, a4
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    srli a3, a3, 48
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    bne a2, a3, .LBB24_2
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:  # %bb.1: # %loadbb1
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a0, 2(a0)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    lbu a1, 2(a1)
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    sub a0, a0, a1
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    ret
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:  .LBB24_2: # %res_block
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    sltu a0, a2, a3
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    neg a0, a0
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    ori a0, a0, 1
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-ALIGNED-RV32-V-LABEL: memcmp_size_3:
+; CHECK-ALIGNED-RV32-V:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a2, 0(a0)
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a3, 1(a0)
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a4, 0(a1)
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a5, 1(a1)
+; CHECK-ALIGNED-RV32-V-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV32-V-NEXT:    slli a6, a3, 16
+; CHECK-ALIGNED-RV32-V-NEXT:    or a2, a6, a2
+; CHECK-ALIGNED-RV32-V-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV32-V-NEXT:    lui a3, 16
+; CHECK-ALIGNED-RV32-V-NEXT:    addi a3, a3, -1
+; CHECK-ALIGNED-RV32-V-NEXT:    and a2, a2, a3
+; CHECK-ALIGNED-RV32-V-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV32-V-NEXT:    slli a6, a5, 16
+; CHECK-ALIGNED-RV32-V-NEXT:    or a4, a6, a4
+; CHECK-ALIGNED-RV32-V-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV32-V-NEXT:    and a3, a4, a3
+; CHECK-ALIGNED-RV32-V-NEXT:    bne a2, a3, .LBB24_2
+; CHECK-ALIGNED-RV32-V-NEXT:  # %bb.1: # %loadbb1
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a0, 2(a0)
+; CHECK-ALIGNED-RV32-V-NEXT:    lbu a1, 2(a1)
+; CHECK-ALIGNED-RV32-V-NEXT:    sub a0, a0, a1
+; CHECK-ALIGNED-RV32-V-NEXT:    ret
+; CHECK-ALIGNED-RV32-V-NEXT:  .LBB24_2: # %res_block
+; CHECK-ALIGNED-RV32-V-NEXT:    sltu a0, a2, a3
+; CHECK-ALIGNED-RV32-V-NEXT:    neg a0, a0
+; CHECK-ALIGNED-RV32-V-NEXT:    ori a0, a0, 1
+; CHECK-ALIGNED-RV32-V-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-V-LABEL: memcmp_size_3:
+; CHECK-ALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a2, 0(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a3, 1(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a4, 0(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a5, 1(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a2, a2, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a6, a3, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    or a2, a6, a2
+; CHECK-ALIGNED-RV64-V-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV64-V-NEXT:    lui a3, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    addiw a3, a3, -1
+; CHECK-ALIGNED-RV64-V-NEXT:    and a2, a2, a3
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV64-V-NEXT:    slli a6, a5, 16
+; CHECK-ALIGNED-RV64-V-NEXT:    or a4, a6, a4
+; CHECK-ALIGNED-RV64-V-NEXT:    or a4, a4, a5
+; CHECK-ALIGNED-RV64-V-NEXT:    and a3, a4, a3
+; CHECK-ALIGNED-RV64-V-NEXT:    bne a2, a3, .LBB24_2
+; CHECK-ALIGNED-RV64-V-NEXT:  # %bb.1: # %loadbb1
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a0, 2(a0)
+; CHECK-ALIGNED-RV64-V-NEXT:    lbu a1, 2(a1)
+; CHECK-ALIGNED-RV64-V-NEXT:    sub a0, a0, a1
+; CHECK-ALIGNED-RV64-V-NEXT:    ret
+; CHECK-ALIGNED-RV64-V-NEXT:  .LBB24_2: # %res_block
+; CHECK-ALIGNED-RV64-V-NEXT:    sltu a0, a2, a3
+; CHECK-ALIGNED-RV64-V-NEXT:    neg a0, a0
+; CHECK-ALIGNED-RV64-V-NEXT:    ori a0, a0, 1
+; CHECK-ALIGNED-RV64-V-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-LABEL: memcmp_size_3:
+; CHECK-UNALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-NEXT:    lhu a2, 0(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lhu a3, 0(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    srli a4, a2, 8
+; CHECK-UNALIGNED-RV32-NEXT:    slli a2, a2, 8
+; CHECK-UNALIGNED-RV32-NEXT:    or a2, a2, a4
+; CHECK-UNALIGNED-RV32-NEXT:    lui a4, 16
+; CHECK-UNALIGNED-RV32-NEXT:    addi a4, a4, -1
+; CHECK-UNALIGNED-RV32-NEXT:    and a2, a2, a4
+; CHECK-UNALIGNED-RV32-NEXT:    srli a5, a3, 8
+; CHECK-UNALIGNED-RV32-NEXT:    slli a3, a3, 8
+; CHECK-UNALIGNED-RV32-NEXT:    or a3, a3, a5
+; CHECK-UNALIGNED-RV32-NEXT:    and a3, a3, a4
+; CHECK-UNALIGNED-RV32-NEXT:    bne a2, a3, .LBB24_2
+; CHECK-UNALIGNED-RV32-NEXT:  # %bb.1: # %loadbb1
+; CHECK-UNALIGNED-RV32-NEXT:    lbu a0, 2(a0)
+; CHECK-UNALIGNED-RV32-NEXT:    lbu a1, 2(a1)
+; CHECK-UNALIGNED-RV32-NEXT:    sub a0, a0, a1
+; CHECK-UNALIGNED-RV32-NEXT:    ret
+; CHECK-UNALIGNED-RV32-NEXT:  .LBB24_2: # %res_block
+; CHECK-UNALIGNED-RV32-NEXT:    sltu a0, a2, a3
+; CHECK-UNALIGNED-RV32-NEXT:    neg a0, a0
+; CHECK-UNALIGNED-RV32-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV32-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: memcmp_size_3:
+; CHECK-UNALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:    lhu a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lhu a3, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    srli a4, a2, 8
+; CHECK-UNALIGNED-RV64-NEXT:    slli a2, a2, 8
+; CHECK-UNALIGNED-RV64-NEXT:    or a2, a2, a4
+; CHECK-UNALIGNED-RV64-NEXT:    lui a4, 16
+; CHECK-UNALIGNED-RV64-NEXT:    addiw a4, a4, -1
+; CHECK-UNALIGNED-RV64-NEXT:    and a2, a2, a4
+; CHECK-UNALIGNED-RV64-NEXT:    srli a5, a3, 8
+; CHECK-UNALIGNED-RV64-NEXT:    slli a3, a3, 8
+; CHECK-UNALIGNED-RV64-NEXT:    or a3, a3, a5
+; CHECK-UNALIGNED-RV64-NEXT:    and a3, a3, a4
+; CHECK-UNALIGNED-RV64-NEXT:    bne a2, a3, .LBB24_2
+; CHECK-UNALIGNED-RV64-NEXT:  # %bb.1: # %loadbb1
+; CHECK-UNALIGNED-RV64-NEXT:    lbu a0, 2(a0)
+; CHECK-UNALIGNED-RV64-NEXT:    lbu a1, 2(a1)
+; CHECK-UNALIGNED-RV64-NEXT:    sub a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:    ret
+; CHECK-UNALIGNED-RV64-NEXT:  .LBB24_2: # %res_block
+; CHECK-UNALIGNED-RV64-NEXT:    sltu a0, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:    neg a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBB-LABEL: memcmp_size_3:
+; CHECK-UNALIGNED-RV32-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lh a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lh a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    rev8 a2, a2
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    srli a2, a2, 16
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    rev8 a3, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    srli a3, a3, 16
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    bne a2, a3, .LBB24_2
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:  # %bb.1: # %loadbb1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a0, 2(a0)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    lbu a1, 2(a1)
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sub a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:  .LBB24_2: # %res_block
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    sltu a0, a2, a3
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    neg a0, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_3:
+; CHECK-UNALIGNED-RV64-ZBB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lh a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lh a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a2, a2
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a2, a2, 48
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    rev8 a3, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    srli a3, a3, 48
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    bne a2, a3, .LBB24_2
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:  # %bb.1: # %loadbb1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a0, 2(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    lbu a1, 2(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sub a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:  .LBB24_2: # %res_block
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    sltu a0, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    neg a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_3:
+; CHECK-UNALIGNED-RV32-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lh a2, 0(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lh a3, 0(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    rev8 a2, a2
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    srli a2, a2, 16
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    rev8 a3, a3
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    srli a3, a3, 16
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    bne a2, a3, .LBB24_2
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:  # %bb.1: # %loadbb1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lbu a0, 2(a0)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    lbu a1, 2(a1)
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sub a0, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:  .LBB24_2: # %res_block
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    sltu a0, a2, a3
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    neg a0, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_3:
+; CHECK-UNALIGNED-RV64-ZBKB:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lh a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lh a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a2, a2
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a2, a2, 48
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    rev8 a3, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    srli a3, a3, 48
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    bne a2, a3, .LBB24_2
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  # %bb.1: # %loadbb1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a0, 2(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    lbu a1, 2(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sub a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:  .LBB24_2: # %res_block
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    sltu a0, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    neg a0, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_3:
+; CHECK-UNALIGNED-RV32-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV32-V-NEXT:    lhu a2, 0(a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    lhu a3, 0(a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    srli a4, a2, 8
+; CHECK-UNALIGNED-RV32-V-NEXT:    slli a2, a2, 8
+; CHECK-UNALIGNED-RV32-V-NEXT:    or a2, a2, a4
+; CHECK-UNALIGNED-RV32-V-NEXT:    lui a4, 16
+; CHECK-UNALIGNED-RV32-V-NEXT:    addi a4, a4, -1
+; CHECK-UNALIGNED-RV32-V-NEXT:    and a2, a2, a4
+; CHECK-UNALIGNED-RV32-V-NEXT:    srli a5, a3, 8
+; CHECK-UNALIGNED-RV32-V-NEXT:    slli a3, a3, 8
+; CHECK-UNALIGNED-RV32-V-NEXT:    or a3, a3, a5
+; CHECK-UNALIGNED-RV32-V-NEXT:    and a3, a3, a4
+; CHECK-UNALIGNED-RV32-V-NEXT:    bne a2, a3, .LBB24_2
+; CHECK-UNALIGNED-RV32-V-NEXT:  # %bb.1: # %loadbb1
+; CHECK-UNALIGNED-RV32-V-NEXT:    lbu a0, 2(a0)
+; CHECK-UNALIGNED-RV32-V-NEXT:    lbu a1, 2(a1)
+; CHECK-UNALIGNED-RV32-V-NEXT:    sub a0, a0, a1
+; CHECK-UNALIGNED-RV32-V-NEXT:    ret
+; CHECK-UNALIGNED-RV32-V-NEXT:  .LBB24_2: # %res_block
+; CHECK-UNALIGNED-RV32-V-NEXT:    sltu a0, a2, a3
+; CHECK-UNALIGNED-RV32-V-NEXT:    neg a0, a0
+; CHECK-UNALIGNED-RV32-V-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV32-V-NEXT:    ret
+;
+; CHECK-UNALIGNED-RV64-V-LABEL: memcmp_size_3:
+; CHECK-UNALIGNED-RV64-V:       # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-V-NEXT:    lhu a2, 0(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lhu a3, 0(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    srli a4, a2, 8
+; CHECK-UNALIGNED-RV64-V-NEXT:    slli a2, a2, 8
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a2, a2, a4
+; CHECK-UNALIGNED-RV64-V-NEXT:    lui a4, 16
+; CHECK-UNALIGNED-RV64-V-NEXT:    addiw a4, a4, -1
+; CHECK-UNALIGNED-RV64-V-NEXT:    and a2, a2, a4
+; CHECK-UNALIGNED-RV64-V-NEXT:    srli a5, a3, 8
+; CHECK-UNALIGNED-RV64-V-NEXT:    slli a3, a3, 8
+; CHECK-UNALIGNED-RV64-V-NEXT:    or a3, a3, a5
+; CHECK-UNALIGNED-RV64-V-NEXT:    and a3, a3, a4
+; CHECK-UNALIGNED-RV64-V-NEXT:    bne a2, a3, .LBB24_2
+; CHECK-UNALIGNED-RV64-V-NEXT:  # %bb.1: # %loadbb1
+; CHECK-UNALIGNED-RV64-V-NEXT:    lbu a0, 2(a0)
+; CHECK-UNALIGNED-RV64-V-NEXT:    lbu a1, 2(a1)
+; CHECK-UNALIGNED-RV64-V-NEXT:    sub a0, a0, a1
+; CHECK-UNALIGNED-RV64-V-NEXT:    ret
+; CHECK-UNALIGNED-RV64-V-NEXT:  .LBB24_2: # %res_block
+; CHECK-UNALIGNED-RV64-V-NEXT:    sltu a0, a2, a3
+; CHECK-UNALIGNED-RV64-V-NEXT:    neg a0, a0
+; CHECK-UNALIGNED-RV64-V-NEXT:    ori a0, a0, 1
+; CHECK-UNALIGNED-RV64-V-NEXT:    ret
 entry:
-  %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 63)
-  ret i32 %bcmp
+  %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iXLen 3)
+  ret i32 %memcmp
 }
 
-define i32 @bcmp_size_64(ptr %s1, ptr %s2) nounwind optsize {
-; CHECK-RV32-LABEL: bcmp_size_64:
-; CHECK-RV32:       # %bb.0: # %entry
-; CHECK-RV32-NEXT:    addi sp, sp, -16
-; CHECK-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT:    li a2, 64
-; CHECK-RV32-NEXT:    call bcmp
-; CHECK-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    addi sp, sp, 16
-; CHECK-RV32-NEXT:    ret
+define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind optsize {
+; CHECK-ALIGNED-RV32-LABEL: memcmp_size_4:
+; CHECK-ALIGNED-RV32:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-NEXT:    lbu a2, 0(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a3, 1(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a0, 3(a0)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a5, 0(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a6, 1(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a7, 2(a1)
+; CHECK-ALIGNED-RV32-NEXT:    lbu a1, 3(a1)
+; CHECK-ALIGNED-RV32-NEXT:    slli a4, a4, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a0, a4, a0
+; CHECK-ALIGNED-RV32-NEXT:    slli a3, a3, 16
+; CHECK-ALIGNED-RV32-NEXT:    slli a2, a2, 24
+; CHECK-ALIGNED-RV32-NEXT:    or a2, a2, a3
+; CHECK-ALIGNED-RV32-NEXT:    or a0, a2, a0
+; CHECK-ALIGNED-RV32-NEXT:    slli a7, a7, 8
+; CHECK-ALIGNED-RV32-NEXT:    or a1, a7, a1
+; CHECK-ALIGNED-RV32-NEXT:    slli a6, a6, 16
+; CHECK-ALIGNED-RV32-NEXT:    slli a5, a5, 24
+; CHECK-ALIGNED-RV32-NEXT:    or a2, a5, a6
+; CHECK-ALIGNED-RV32-NEXT:    or a1, a2, a1
+; CHECK-ALIGNED-RV32-NEXT:    sltu a2, a1, a0
+; CHECK-ALIGNED-RV32-NEXT:    sltu a0, a0, a1
+; CHECK-ALIGNED-RV32-NEXT:    sub a0, a2, a0
+; CHECK-ALIGNED-RV32-NEXT:    ret
+;
+; CHECK-ALIGNED-RV64-LABEL: memcmp_size_4:
+; CHECK-ALIGNED-RV64:       # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-NEXT:    lbu a2, 0(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a3, 1(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lbu a4, 2(a0)
+; CHECK-ALIGNED-RV64-NEXT:    lb a0, 3(a0)
----------------
wangpc-pp wrote:

It seems to be related to https://reviews.llvm.org/D130397. Maybe we should revert this change or add a pattern to catch this.

https://github.com/llvm/llvm-project/pull/107548


More information about the llvm-branch-commits mailing list