[llvm] [ExpandMemCmp] Optimize ExpandMemCmp to reduce instruction count on x86 (PR #69609)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 19 08:45:42 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Igor Kirillov (igogo-x86)
<details>
<summary>Changes</summary>
Refactored the sequence of operations in MemCmpExpansion to zero-extend before byte-swapping. This change enables the generation of fewer instructions for x86, thereby improving code efficiency.
---
Patch is 132.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/69609.diff
11 Files Affected:
- (modified) llvm/lib/CodeGen/ExpandMemCmp.cpp (+16-15)
- (modified) llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll (+10-16)
- (modified) llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll (+8-14)
- (modified) llvm/test/CodeGen/X86/memcmp-optsize-x32.ll (+4-6)
- (modified) llvm/test/CodeGen/X86/memcmp-optsize.ll (+4-6)
- (modified) llvm/test/CodeGen/X86/memcmp-pgso-x32.ll (+4-6)
- (modified) llvm/test/CodeGen/X86/memcmp-pgso.ll (+4-6)
- (modified) llvm/test/CodeGen/X86/memcmp-x32.ll (+14-22)
- (modified) llvm/test/CodeGen/X86/memcmp.ll (+12-20)
- (modified) llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll (+196-196)
- (modified) llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll (+529-529)
``````````diff
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 911ebd41afc5b91..40fbe877ab7ac09 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -307,19 +307,20 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
if (!Rhs)
Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign);
+ // Zero extend if required.
+ if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType) {
+ Lhs = Builder.CreateZExt(Lhs, CmpSizeType);
+ Rhs = Builder.CreateZExt(Rhs, CmpSizeType);
+ }
+
// Swap bytes if required.
if (NeedsBSwap) {
+ Type *BSwapType = CmpSizeType ? CmpSizeType : LoadSizeType;
Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
- Intrinsic::bswap, LoadSizeType);
+ Intrinsic::bswap, BSwapType);
Lhs = Builder.CreateCall(Bswap, Lhs);
Rhs = Builder.CreateCall(Bswap, Rhs);
}
-
- // Zero extend if required.
- if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType) {
- Lhs = Builder.CreateZExt(Lhs, CmpSizeType);
- Rhs = Builder.CreateZExt(Rhs, CmpSizeType);
- }
return {Lhs, Rhs};
}
@@ -694,10 +695,10 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
/// %17 = getelementptr i32, i32* %15, i32 2
/// %18 = load i32, i32* %16
/// %19 = load i32, i32* %17
-/// %20 = call i32 @llvm.bswap.i32(i32 %18)
-/// %21 = call i32 @llvm.bswap.i32(i32 %19)
-/// %22 = zext i32 %20 to i64
-/// %23 = zext i32 %21 to i64
+/// %20 = zext i32 %18 to i64
+/// %21 = zext i32 %19 to i64
+/// %22 = call i64 @llvm.bswap.i64(i64 %20)
+/// %23 = call i64 @llvm.bswap.i64(i64 %21)
/// %24 = sub i64 %22, %23
/// %25 = icmp ne i64 %24, 0
/// br i1 %25, label %res_block, label %loadbb2
@@ -710,10 +711,10 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
/// %31 = getelementptr i16, i16* %29, i16 6
/// %32 = load i16, i16* %30
/// %33 = load i16, i16* %31
-/// %34 = call i16 @llvm.bswap.i16(i16 %32)
-/// %35 = call i16 @llvm.bswap.i16(i16 %33)
-/// %36 = zext i16 %34 to i64
-/// %37 = zext i16 %35 to i64
+/// %34 = zext i16 %32 to i64
+/// %35 = zext i16 %33 to i64
+/// %36 = call i64 @llvm.bswap.i64(i16 %34)
+/// %37 = call i64 @llvm.bswap.i64(i16 %35)
/// %38 = sub i64 %36, %37
/// %39 = icmp ne i64 %38, 0
/// br i1 %39, label %res_block, label %loadbb3
diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
index c0f8f86e6e8b107..a89571656e46951 100644
--- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
@@ -44,14 +44,12 @@ define i1 @length0_lt(ptr %X, ptr %Y) nounwind {
define i32 @length2(ptr %X, ptr %Y) nounwind {
; X86-LABEL: length2:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: movzwl %cx, %eax
-; X86-NEXT: movzwl %dx, %ecx
+; X86-NEXT: bswapl %eax
+; X86-NEXT: bswapl %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
@@ -75,14 +73,12 @@ define i1 @length2_eq(ptr %X, ptr %Y) nounwind {
define i1 @length2_lt(ptr %X, ptr %Y) nounwind {
; X86-LABEL: length2_lt:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: movzwl %cx, %eax
-; X86-NEXT: movzwl %dx, %ecx
+; X86-NEXT: bswapl %eax
+; X86-NEXT: bswapl %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
@@ -99,10 +95,8 @@ define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl (%ecx), %ecx
; X86-NEXT: movzwl (%eax), %eax
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %ax
-; X86-NEXT: movzwl %cx, %ecx
-; X86-NEXT: movzwl %ax, %eax
+; X86-NEXT: bswapl %ecx
+; X86-NEXT: bswapl %eax
; X86-NEXT: subl %eax, %ecx
; X86-NEXT: testl %ecx, %ecx
; X86-NEXT: setg %al
diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
index 56d06021867fa15..1f07ba39ecef9fe 100644
--- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
+++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
@@ -52,10 +52,8 @@ define i32 @length2(ptr %X, ptr %Y) nounwind {
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
+; X64-NEXT: bswapl %eax
+; X64-NEXT: bswapl %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
@@ -79,10 +77,8 @@ define i1 @length2_lt(ptr %X, ptr %Y) nounwind {
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
+; X64-NEXT: bswapl %eax
+; X64-NEXT: bswapl %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: shrl $31, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
@@ -97,10 +93,8 @@ define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
+; X64-NEXT: bswapl %eax
+; X64-NEXT: bswapl %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: testl %eax, %eax
; X64-NEXT: setg %al
@@ -511,8 +505,8 @@ define i32 @length12(ptr %X, ptr %Y) nounwind {
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movl 8(%rdi), %ecx
; X64-NEXT: movl 8(%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
+; X64-NEXT: bswapq %rcx
+; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB29_3
diff --git a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll
index 762691151f4bd3b..8efd4fca91a9972 100644
--- a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll
@@ -13,14 +13,12 @@ declare dso_local i32 @bcmp(ptr, ptr, i32)
define i32 @length2(ptr %X, ptr %Y) nounwind optsize {
; X86-LABEL: length2:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: movzwl %cx, %eax
-; X86-NEXT: movzwl %dx, %ecx
+; X86-NEXT: bswapl %eax
+; X86-NEXT: bswapl %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
diff --git a/llvm/test/CodeGen/X86/memcmp-optsize.ll b/llvm/test/CodeGen/X86/memcmp-optsize.ll
index c0c7b98d471cd46..a8df0ac1354f893 100644
--- a/llvm/test/CodeGen/X86/memcmp-optsize.ll
+++ b/llvm/test/CodeGen/X86/memcmp-optsize.ll
@@ -16,10 +16,8 @@ define i32 @length2(ptr %X, ptr %Y) nounwind optsize {
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
+; X64-NEXT: bswapl %eax
+; X64-NEXT: bswapl %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
@@ -251,8 +249,8 @@ define i32 @length12(ptr %X, ptr %Y) nounwind optsize {
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movl 8(%rdi), %ecx
; X64-NEXT: movl 8(%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
+; X64-NEXT: bswapq %rcx
+; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB15_3
diff --git a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll b/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll
index cb45fd3ebb9068c..b486eebd54b4a37 100644
--- a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll
@@ -13,14 +13,12 @@ declare dso_local i32 @bcmp(ptr, ptr, i32)
define i32 @length2(ptr %X, ptr %Y) nounwind !prof !14 {
; X86-LABEL: length2:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: movzwl %cx, %eax
-; X86-NEXT: movzwl %dx, %ecx
+; X86-NEXT: bswapl %eax
+; X86-NEXT: bswapl %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
diff --git a/llvm/test/CodeGen/X86/memcmp-pgso.ll b/llvm/test/CodeGen/X86/memcmp-pgso.ll
index 720344a22e43b5c..afb57c8101b8221 100644
--- a/llvm/test/CodeGen/X86/memcmp-pgso.ll
+++ b/llvm/test/CodeGen/X86/memcmp-pgso.ll
@@ -16,10 +16,8 @@ define i32 @length2(ptr %X, ptr %Y) nounwind !prof !14 {
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
+; X64-NEXT: bswapl %eax
+; X64-NEXT: bswapl %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
@@ -251,8 +249,8 @@ define i32 @length12(ptr %X, ptr %Y) nounwind !prof !14 {
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movl 8(%rdi), %ecx
; X64-NEXT: movl 8(%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
+; X64-NEXT: bswapq %rcx
+; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB15_3
diff --git a/llvm/test/CodeGen/X86/memcmp-x32.ll b/llvm/test/CodeGen/X86/memcmp-x32.ll
index ab439b32f2f1b20..f5b67ab45f7255c 100644
--- a/llvm/test/CodeGen/X86/memcmp-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-x32.ll
@@ -43,14 +43,12 @@ define i1 @length0_lt(ptr %X, ptr %Y) nounwind {
define i32 @length2(ptr %X, ptr %Y) nounwind {
; X86-LABEL: length2:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: movzwl %cx, %eax
-; X86-NEXT: movzwl %dx, %ecx
+; X86-NEXT: bswapl %eax
+; X86-NEXT: bswapl %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind
@@ -62,9 +60,8 @@ define i32 @length2_const(ptr %X, ptr %Y) nounwind {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %eax
-; X86-NEXT: rolw $8, %ax
-; X86-NEXT: movzwl %ax, %eax
-; X86-NEXT: addl $-12594, %eax # imm = 0xCECE
+; X86-NEXT: bswapl %eax
+; X86-NEXT: addl $-825360384, %eax # imm = 0xCECE0000
; X86-NEXT: retl
%m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
ret i32 %m
@@ -75,9 +72,8 @@ define i1 @length2_gt_const(ptr %X, ptr %Y) nounwind {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %eax
-; X86-NEXT: rolw $8, %ax
-; X86-NEXT: movzwl %ax, %eax
-; X86-NEXT: addl $-12594, %eax # imm = 0xCECE
+; X86-NEXT: bswapl %eax
+; X86-NEXT: addl $-825360384, %eax # imm = 0xCECE0000
; X86-NEXT: testl %eax, %eax
; X86-NEXT: setg %al
; X86-NEXT: retl
@@ -103,14 +99,12 @@ define i1 @length2_eq(ptr %X, ptr %Y) nounwind {
define i1 @length2_lt(ptr %X, ptr %Y) nounwind {
; X86-LABEL: length2_lt:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %dx
-; X86-NEXT: movzwl %cx, %eax
-; X86-NEXT: movzwl %dx, %ecx
+; X86-NEXT: bswapl %eax
+; X86-NEXT: bswapl %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
@@ -127,10 +121,8 @@ define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl (%ecx), %ecx
; X86-NEXT: movzwl (%eax), %eax
-; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %ax
-; X86-NEXT: movzwl %cx, %ecx
-; X86-NEXT: movzwl %ax, %eax
+; X86-NEXT: bswapl %ecx
+; X86-NEXT: bswapl %eax
; X86-NEXT: subl %eax, %ecx
; X86-NEXT: testl %ecx, %ecx
; X86-NEXT: setg %al
diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll
index 1330f3a241a5c2a..b8c0f509f1d081b 100644
--- a/llvm/test/CodeGen/X86/memcmp.ll
+++ b/llvm/test/CodeGen/X86/memcmp.ll
@@ -51,10 +51,8 @@ define i32 @length2(ptr %X, ptr %Y) nounwind {
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
+; X64-NEXT: bswapl %eax
+; X64-NEXT: bswapl %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
@@ -65,9 +63,8 @@ define i32 @length2_const(ptr %X, ptr %Y) nounwind {
; X64-LABEL: length2_const:
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: addl $-12594, %eax # imm = 0xCECE
+; X64-NEXT: bswapl %eax
+; X64-NEXT: addl $-825360384, %eax # imm = 0xCECE0000
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
ret i32 %m
@@ -77,9 +74,8 @@ define i1 @length2_gt_const(ptr %X, ptr %Y) nounwind {
; X64-LABEL: length2_gt_const:
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: addl $-12594, %eax # imm = 0xCECE
+; X64-NEXT: bswapl %eax
+; X64-NEXT: addl $-825360384, %eax # imm = 0xCECE0000
; X64-NEXT: testl %eax, %eax
; X64-NEXT: setg %al
; X64-NEXT: retq
@@ -105,10 +101,8 @@ define i1 @length2_lt(ptr %X, ptr %Y) nounwind {
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
+; X64-NEXT: bswapl %eax
+; X64-NEXT: bswapl %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: shrl $31, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
@@ -123,10 +117,8 @@ define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
-; X64-NEXT: rolw $8, %ax
-; X64-NEXT: rolw $8, %cx
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: movzwl %cx, %ecx
+; X64-NEXT: bswapl %eax
+; X64-NEXT: bswapl %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: testl %eax, %eax
; X64-NEXT: setg %al
@@ -537,8 +529,8 @@ define i32 @length12(ptr %X, ptr %Y) nounwind {
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movl 8(%rdi), %ecx
; X64-NEXT: movl 8(%rsi), %edx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: bswapl %edx
+; X64-NEXT: bswapq %rcx
+; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB31_3
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll
index f56d9688a01e12d..bd42d5f8d50859d 100644
--- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll
@@ -5,14 +5,14 @@ declare i32 @memcmp(ptr nocapture, ptr nocapture, i32)
define i32 @cmp2(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X32-LABEL: @cmp2(
-; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
-; X32-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
-; X32-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
-; X32-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32
-; X32-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
-; X32-NEXT: ret i32 [[TMP9]]
+; X32-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 1
+; X32-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y:%.*]], align 1
+; X32-NEXT: [[TMP3:%.*]] = zext i16 [[TMP1]] to i32
+; X32-NEXT: [[TMP4:%.*]] = zext i16 [[TMP2]] to i32
+; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X32-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X32-NEXT: ret i32 [[TMP7]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i32 2)
ret i32 %call
@@ -20,14 +20,14 @@ define i32 @cmp2(ptr nocapture readonly %x, ptr nocapture readonly %y) {
define i32 @cmp2_align2(ptr nocapture readonly align 2 %x, ptr nocapture readonly align 2 %y) {
; X32-LABEL: @cmp2_align2(
-; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 2
-; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 2
-; X32-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
-; X32-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
-; X32-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
-; X32-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32
-; X32-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]
-; X32-NEXT: ret i32 [[TMP9]]
+; X32-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 2
+; X32-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y:%.*]], align 2
+; X32-NEXT: [[TMP3:%.*]] = zext i16 [[TMP1]] to i32
+; X32-NEXT: [[TMP4:%.*]] = zext i16 [[TMP2]] to i32
+; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
+; X32-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+; X32-NEXT: ret i32 [[TMP7]]
;
%call = tail call i32 @memcmp(ptr %x, ptr %y, i32 2)
ret i32 %call
@@ -37,27 +37,27 @@ define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
; X32-LABEL: @cmp3(
; X32-NEXT: br label [[LOADBB:%.*]]
; X32: res_block:
-; X32-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP7:%.*]], [[TMP8:%.*]]
+; X32-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]]
; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
; X32-NEXT: br label [[ENDBLOCK:%.*]]
; X32: loadbb:
-; X32-NEXT: [[TMP5:%.*]] = load i16, ptr [[X:%.*]], align 1
-; X32-NEXT: [[TMP6:%.*]] = load i16, ptr [[Y:%.*]], align 1
-; X32-NEXT: [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]])
-; X32-NEXT: [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]])
-; X32-NEXT: [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]]
-; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1
+; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1
+; X32-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X32-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
+; X32-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]]
+; X32-NEXT: br i1 [[TMP...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/69609
More information about the llvm-commits
mailing list