[llvm] [ExpandMemCmp][AArch64][PowerPC][RISCV][X86] Use llvm.ucmp instead of (sub (zext (icmp ugt)), (zext (icmp ult))). (PR #121530)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 2 16:22:16 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
AArch64 looks like an improvement.
Not sure about PowerPC.
RISC-V is neutral.
X86 trades a dependency breaking xor before a seta for a movsx after a sbbb. Depending on how the result is used, this movsx might go away.
---
Patch is 31.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121530.diff
18 Files Affected:
- (modified) llvm/lib/CodeGen/ExpandMemCmp.cpp (+3-11)
- (modified) llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll (+1-2)
- (modified) llvm/test/CodeGen/AArch64/memcmp.ll (+5-10)
- (modified) llvm/test/CodeGen/PowerPC/memcmp.ll (+8-10)
- (modified) llvm/test/CodeGen/PowerPC/memcmpIR.ll (+4-12)
- (modified) llvm/test/CodeGen/RISCV/memcmp-optsize.ll (+18-18)
- (modified) llvm/test/CodeGen/RISCV/memcmp.ll (+18-18)
- (modified) llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll (+12-12)
- (modified) llvm/test/CodeGen/X86/memcmp-optsize-x32.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/memcmp-optsize.ll (+12-12)
- (modified) llvm/test/CodeGen/X86/memcmp-pgso-x32.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/memcmp-pgso.ll (+12-12)
- (modified) llvm/test/CodeGen/X86/memcmp-x32.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/memcmp.ll (+12-12)
- (modified) llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll (+5-25)
- (modified) llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll (+1-5)
- (modified) llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll (+2-10)
``````````diff
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index f8ca7e370f6ef9..6dc3e04ac802c2 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -696,17 +696,9 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
}
}
- // The result of memcmp is negative, zero, or positive, so produce that by
- // subtracting 2 extended compare bits: sub (ugt, ult).
- // If a target prefers to use selects to get -1/0/1, they should be able
- // to transform this later. The inverse transform (going from selects to math)
- // may not be possible in the DAG because the selects got converted into
- // branches before we got there.
- Value *CmpUGT = Builder.CreateICmpUGT(Loads.Lhs, Loads.Rhs);
- Value *CmpULT = Builder.CreateICmpULT(Loads.Lhs, Loads.Rhs);
- Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());
- Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());
- return Builder.CreateSub(ZextUGT, ZextULT);
+ // The result of memcmp is negative, zero, or positive.
+ return Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::ucmp,
+ {Loads.Lhs, Loads.Rhs});
}
// This function expands the memcmp call into an inline expansion and returns
diff --git a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
index 17f8263560430d..a32c53a5a57478 100644
--- a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
+++ b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
@@ -313,9 +313,8 @@ define void @one_dimensional_with_store(ptr %a, ptr %b, ptr %c, i32 %N) {
; CHECK-NEXT: rev w9, w9
; CHECK-NEXT: cmp w9, w10
; CHECK-NEXT: cset w9, hi
-; CHECK-NEXT: cset w10, lo
+; CHECK-NEXT: csinv w9, w9, wzr, hs
; CHECK-NEXT: subs x8, x8, #1
-; CHECK-NEXT: sub w9, w9, w10
; CHECK-NEXT: strb w9, [x2], #1
; CHECK-NEXT: b.ne .LBB4_1
; CHECK-NEXT: // %bb.2: // %for.exit
diff --git a/llvm/test/CodeGen/AArch64/memcmp.ll b/llvm/test/CodeGen/AArch64/memcmp.ll
index 4da7c8c95a4e4f..4f58fd74d7d508 100644
--- a/llvm/test/CodeGen/AArch64/memcmp.ll
+++ b/llvm/test/CodeGen/AArch64/memcmp.ll
@@ -162,8 +162,7 @@ define i32 @length3(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev w9, w9
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w8, hi
-; CHECK-NEXT: cset w9, lo
-; CHECK-NEXT: sub w0, w8, w9
+; CHECK-NEXT: csinv w0, w8, wzr, hs
; CHECK-NEXT: ret
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
ret i32 %m
@@ -194,8 +193,7 @@ define i32 @length4(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev w9, w9
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w8, hi
-; CHECK-NEXT: cset w9, lo
-; CHECK-NEXT: sub w0, w8, w9
+; CHECK-NEXT: csinv w0, w8, wzr, hs
; CHECK-NEXT: ret
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
ret i32 %m
@@ -286,8 +284,7 @@ define i32 @length5(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: cset w8, hi
-; CHECK-NEXT: cset w9, lo
-; CHECK-NEXT: sub w0, w8, w9
+; CHECK-NEXT: csinv w0, w8, wzr, hs
; CHECK-NEXT: ret
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
ret i32 %m
@@ -341,8 +338,7 @@ define i32 @length6(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: cset w8, hi
-; CHECK-NEXT: cset w9, lo
-; CHECK-NEXT: sub w0, w8, w9
+; CHECK-NEXT: csinv w0, w8, wzr, hs
; CHECK-NEXT: ret
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 6) nounwind
ret i32 %m
@@ -450,8 +446,7 @@ define i32 @length8(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: cset w8, hi
-; CHECK-NEXT: cset w9, lo
-; CHECK-NEXT: sub w0, w8, w9
+; CHECK-NEXT: csinv w0, w8, wzr, hs
; CHECK-NEXT: ret
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
ret i32 %m
diff --git a/llvm/test/CodeGen/PowerPC/memcmp.ll b/llvm/test/CodeGen/PowerPC/memcmp.ll
index 0634534b9c9df1..39f92699973154 100644
--- a/llvm/test/CodeGen/PowerPC/memcmp.ll
+++ b/llvm/test/CodeGen/PowerPC/memcmp.ll
@@ -6,13 +6,12 @@ define signext i32 @memcmp8(ptr nocapture readonly %buffer1, ptr nocapture reado
; CHECK: # %bb.0:
; CHECK-NEXT: ldbrx 3, 0, 3
; CHECK-NEXT: ldbrx 4, 0, 4
-; CHECK-NEXT: subc 5, 4, 3
-; CHECK-NEXT: subfe 5, 4, 4
-; CHECK-NEXT: subc 4, 3, 4
-; CHECK-NEXT: subfe 3, 3, 3
-; CHECK-NEXT: neg 5, 5
+; CHECK-NEXT: cmpld 3, 4
+; CHECK-NEXT: subc 3, 4, 3
+; CHECK-NEXT: subfe 3, 4, 4
+; CHECK-NEXT: li 4, -1
; CHECK-NEXT: neg 3, 3
-; CHECK-NEXT: sub 3, 5, 3
+; CHECK-NEXT: isellt 3, 4, 3
; CHECK-NEXT: extsw 3, 3
; CHECK-NEXT: blr
%call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 8)
@@ -24,12 +23,11 @@ define signext i32 @memcmp4(ptr nocapture readonly %buffer1, ptr nocapture reado
; CHECK: # %bb.0:
; CHECK-NEXT: lwbrx 3, 0, 3
; CHECK-NEXT: lwbrx 4, 0, 4
+; CHECK-NEXT: cmplw 3, 4
; CHECK-NEXT: sub 5, 4, 3
-; CHECK-NEXT: sub 3, 3, 4
+; CHECK-NEXT: li 3, -1
; CHECK-NEXT: rldicl 5, 5, 1, 63
-; CHECK-NEXT: rldicl 3, 3, 1, 63
-; CHECK-NEXT: sub 3, 5, 3
-; CHECK-NEXT: extsw 3, 3
+; CHECK-NEXT: isellt 3, 3, 5
; CHECK-NEXT: blr
%call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 4)
ret i32 %call
diff --git a/llvm/test/CodeGen/PowerPC/memcmpIR.ll b/llvm/test/CodeGen/PowerPC/memcmpIR.ll
index 0a8bec7dc0e3f1..b57d2b5116b779 100644
--- a/llvm/test/CodeGen/PowerPC/memcmpIR.ll
+++ b/llvm/test/CodeGen/PowerPC/memcmpIR.ll
@@ -59,22 +59,14 @@ define signext i32 @test2(ptr nocapture readonly %buffer1, ptr nocapture readonl
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, ptr
; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]])
; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]])
- ; CHECK-NEXT: [[CMP1:%[0-9]+]] = icmp ugt i32 [[BSWAP1]], [[BSWAP2]]
- ; CHECK-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[BSWAP1]], [[BSWAP2]]
- ; CHECK-NEXT: [[Z1:%[0-9]+]] = zext i1 [[CMP1]] to i32
- ; CHECK-NEXT: [[Z2:%[0-9]+]] = zext i1 [[CMP2]] to i32
- ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i32 [[Z1]], [[Z2]]
- ; CHECK-NEXT: ret i32 [[SUB]]
+ ; CHECK-NEXT: [[UCMP:%[0-9]+]] = call i32 @llvm.ucmp.i32.i32(i32 [[BSWAP1]], i32 [[BSWAP2]])
+ ; CHECK-NEXT: ret i32 [[UCMP]]
; CHECK-BE-LABEL: @test2(
; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, ptr
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, ptr
- ; CHECK-BE-NEXT: [[CMP1:%[0-9]+]] = icmp ugt i32 [[LOAD1]], [[LOAD2]]
- ; CHECK-BE-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[LOAD1]], [[LOAD2]]
- ; CHECK-BE-NEXT: [[Z1:%[0-9]+]] = zext i1 [[CMP1]] to i32
- ; CHECK-BE-NEXT: [[Z2:%[0-9]+]] = zext i1 [[CMP2]] to i32
- ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i32 [[Z1]], [[Z2]]
- ; CHECK-BE-NEXT: ret i32 [[SUB]]
+ ; CHECK-BE-NEXT: [[UCMP:%[0-9]+]] = call i32 @llvm.ucmp.i32.i32(i32 [[LOAD1]], i32 [[LOAD2]])
+ ; CHECK-BE-NEXT: ret i32 [[UCMP]]
entry:
%call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 4)
diff --git a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
index d529ae6ecd0aba..0d2acb313bf96d 100644
--- a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
+++ b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
@@ -2658,9 +2658,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind optsize {
; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a1, 0(a1)
; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a0, a0
; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a1, a1
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a2, a1, a0
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a0, a0, a1
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: sub a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a2, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a0, a1, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: sub a0, a0, a2
; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret
;
; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_4:
@@ -2671,9 +2671,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind optsize {
; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1
; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a0, a0, 32
; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a1, a1, 32
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a1, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a1, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a0, a2
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
;
; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_4:
@@ -2682,9 +2682,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind optsize {
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a1, 0(a1)
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a0, a0
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a1, a1
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a2, a1, a0
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a0, a0, a1
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sub a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a2, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a0, a1, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sub a0, a0, a2
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret
;
; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_4:
@@ -2695,9 +2695,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind optsize {
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a0, a0, 32
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a1, a1, 32
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a1, a0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a1, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a0, a2
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
;
; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_4:
@@ -3500,9 +3500,9 @@ define i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind optsize {
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ld a1, 0(a1)
; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a0, a0
; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a1, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a1, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a0, a2
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
;
; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_8:
@@ -3533,9 +3533,9 @@ define i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind optsize {
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ld a1, 0(a1)
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a0, a0
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a1, a0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a1, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a0, a2
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
;
; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_8:
diff --git a/llvm/test/CodeGen/RISCV/memcmp.ll b/llvm/test/CodeGen/RISCV/memcmp.ll
index 860c3a94abc0a7..f12da0cda7d2b7 100644
--- a/llvm/test/CodeGen/RISCV/memcmp.ll
+++ b/llvm/test/CodeGen/RISCV/memcmp.ll
@@ -3354,9 +3354,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind {
; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a1, 0(a1)
; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a0, a0
; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a1, a1
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a2, a1, a0
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a0, a0, a1
-; CHECK-UNALIGNED-RV32-ZBB-NEXT: sub a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a2, a0, a1
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a0, a1, a0
+; CHECK-UNALIGNED-RV32-ZBB-NEXT: sub a0, a0, a2
; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret
;
; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_4:
@@ -3367,9 +3367,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind {
; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1
; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a0, a0, 32
; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a1, a1, 32
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a1, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a1, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a0, a2
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
;
; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_4:
@@ -3378,9 +3378,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind {
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a1, 0(a1)
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a0, a0
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a1, a1
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a2, a1, a0
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a0, a0, a1
-; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sub a0, a2, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a2, a0, a1
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a0, a1, a0
+; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sub a0, a0, a2
; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret
;
; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_4:
@@ -3391,9 +3391,9 @@ define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind {
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a0, a0, 32
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a1, a1, 32
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a1, a0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a1, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a0, a2
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
;
; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_4:
@@ -4196,9 +4196,9 @@ define i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind {
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ld a1, 0(a1)
; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a0, a0
; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a1, a0
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a1, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a0, a2
; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret
;
; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_size_8:
@@ -4229,9 +4229,9 @@ define i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind {
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ld a1, 0(a1)
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a0, a0
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a1, a0
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a0, a1
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a1, a0
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a0, a2
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret
;
; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_size_8:
diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
index ee5fd78c643793..62935f7e372b3a 100644
--- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
@@ -193,13 +193,13 @@ define i32 @length4(ptr %X, ptr %Y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %ecx
-; X86-NEXT: movl (%eax), %edx
+; X86-NEXT: movl (%eax), %eax
; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
+; X86-NEXT: bswapl %eax
+; X86-NEXT: cmpl %eax, %ecx
; X86-NEXT: seta %al
-; X86-NEXT: sbbl $0, %eax
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movsbl %al, %eax
; X86-NEXT: retl
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
ret i32 %m
diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
index a46f9ed3d3798d..9bbd335a903be9 100644
--- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
+++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
@@ -179,14 +179,14 @@ define i1 @length3_eq(ptr %X, ptr %Y) nounwind {
define i32 @length4(ptr %X, ptr %Y) nounwind {
; X64-LABEL: length4:
; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl (%rsi), %edx
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: movl (%rsi), %ecx
+; X64-NEXT: bswapl %eax
; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
+; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: seta %al
-; X64-NEXT: sbbl $0, %eax
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movsbl %al, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
ret i32 %m
@@ -391,14 +391,14 @@ define i1 @length7_lt(ptr %X, ptr %Y) nounwind {
define i32 @length8(ptr %X, ptr %Y) nounwind {
; X64-LABEL: length8:
; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: movq (%rsi), %rcx
+; X64-NEXT: bswapq %rax
; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
+; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: seta %al
-; X64-NEXT: sbbl $0, %eax
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movsbl %al, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
ret i32 %m
diff --git a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll
index 4a9643c0f4fc89..3a16ab656b11fa 100644
--- a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll
@@ -122,13 +122,13 @@ define i32 @length4(ptr %X, ptr %Y) nounwind optsize {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %ecx
-; X86-NEXT: movl (%eax), %edx
+; X86-NEXT: movl (%eax), %eax
; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
+; X86-NEXT: bswapl %eax
+; X86-NEXT: cmpl %eax, %ecx
; X86-NEXT: seta %al
-; X86-NEXT: sbbl $0, %eax
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movsbl %al, %eax
; X86-NEXT: retl
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 4) nounwind
ret i32 %m
diff --git a/llvm/test/CodeGen/X86/memcmp-optsize.ll b/llvm/test/CodeGen/X86/memcmp-optsize.ll
index 4e27301436c344..0f817b2c727c33 100644
--- a/llvm/test/CodeGen/X86/memcmp-optsize.ll
+++ b/llvm/test/CodeGen/X86/memcmp-optsize.ll
@@ -107,14 +107,14 @@ define i1 @length3_eq(ptr %X, ptr %Y) nounwind optsize {
define i32 @length4(ptr %X, ptr %Y) nounwind optsize {
; X64-LABEL: length4:
; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl (%rsi), %edx
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: movl (%rsi), %ecx
+; X64-NEXT: bswapl %eax
; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %edx, %ecx
+; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: seta %al
-; X64-NEXT: sbbl $0, %eax
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movsbl %al, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
ret i32 %m
@@ -186,14 +186,14 @@ define i1 @length5_eq(ptr %X, ptr %Y) nounwind optsize {
define i32 @length8(ptr %X, ptr %Y) nounwind optsize {
; X64-LABEL: length8:
; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq (%rsi), %rdx
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: movq (%rsi), %rcx
+; X64-NEXT: bswapq %rax
; X64-NEXT: bswapq %rcx
-; X64-NEXT: bswapq %rdx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpq %rdx, %rcx
+; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: seta %al
-; X64-NEXT: sbbl $0, %eax
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movsbl %al, %eax
; X64-NEXT: retq
%m = t...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/121530
More information about the llvm-commits
mailing list