[llvm] b4c4013 - [X86] narrowBitOpRMW - peek through bitcasts while searching for RMW patterns (#167497)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 11 04:58:20 PST 2025
Author: Simon Pilgrim
Date: 2025-11-11T12:58:16Z
New Revision: b4c40130cc6416c63513752b77ef720d2c832a11
URL: https://github.com/llvm/llvm-project/commit/b4c40130cc6416c63513752b77ef720d2c832a11
DIFF: https://github.com/llvm/llvm-project/commit/b4c40130cc6416c63513752b77ef720d2c832a11.diff
LOG: [X86] narrowBitOpRMW - peek through bitcasts while searching for RMW patterns (#167497)
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/bittest-big-integer.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5bce539c45341..fa3dce256046f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53412,7 +53412,7 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
return SDValue();
// SrcVal must be a matching normal load further up the chain.
- auto *Ld = dyn_cast<LoadSDNode>(SrcVal);
+ auto *Ld = dyn_cast<LoadSDNode>(peekThroughBitcasts(SrcVal));
if (!Ld || !ISD::isNormalLoad(Ld) || !Ld->isSimple() ||
Ld->getBasePtr() != St->getBasePtr() ||
Ld->getOffset() != St->getOffset() ||
diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll
index e9e9ee9c97593..9b7569ff8b29f 100644
--- a/llvm/test/CodeGen/X86/bittest-big-integer.ll
+++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll
@@ -967,82 +967,63 @@ define <8 x i16> @complement_ne_i128_bitcast(ptr %word, i32 %position) nounwind
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $80, %esp
-; X86-NEXT: movzbl 16(%ebp), %ecx
-; X86-NEXT: movl 12(%ebp), %edx
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: shrb $3, %al
-; X86-NEXT: andb $12, %al
-; X86-NEXT: negb %al
-; X86-NEXT: movsbl %al, %eax
-; X86-NEXT: movl 56(%esp,%eax), %esi
-; X86-NEXT: movl 60(%esp,%eax), %ebx
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: shldl %cl, %esi, %ebx
-; X86-NEXT: movzwl 14(%edx), %edi
+; X86-NEXT: movl 12(%ebp), %eax
+; X86-NEXT: movzwl (%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movzwl 12(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movzwl 14(%eax), %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shll $16, %edi
-; X86-NEXT: movzwl 12(%edx), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: orl %ecx, %edi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: movl 52(%esp,%eax), %edx
-; X86-NEXT: movzbl 16(%ebp), %ecx
-; X86-NEXT: shldl %cl, %edx, %esi
-; X86-NEXT: movl 12(%ebp), %eax
-; X86-NEXT: movzwl 10(%eax), %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shll $16, %ebx
-; X86-NEXT: movzwl 8(%eax), %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %eax, %ebx
-; X86-NEXT: xorl %esi, %ebx
-; X86-NEXT: movl (%esp), %eax # 4-byte Reload
-; X86-NEXT: movl 48(%esp,%eax), %esi
-; X86-NEXT: shldl %cl, %esi, %edx
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movzwl 6(%ecx), %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: shll $16, %eax
-; X86-NEXT: movzwl 4(%ecx), %ecx
+; X86-NEXT: movzwl 2(%eax), %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: xorl %edx, %eax
-; X86-NEXT: movzbl 16(%ebp), %ecx
-; X86-NEXT: shll %cl, %esi
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movzwl 2(%ecx), %edx
+; X86-NEXT: movzwl 4(%eax), %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shll $16, %edx
-; X86-NEXT: movzwl (%ecx), %ecx
+; X86-NEXT: movzwl 6(%eax), %esi
+; X86-NEXT: movzwl 8(%eax), %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %ecx, %edx
-; X86-NEXT: xorl %esi, %edx
+; X86-NEXT: movzwl 10(%eax), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: shll $16, %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: shll $16, %eax
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shll $16, %eax
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 16(%ebp), %eax
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: andb $96, %bl
+; X86-NEXT: shrb $3, %bl
+; X86-NEXT: movzbl %bl, %edi
+; X86-NEXT: movl 32(%esp,%edi), %edi
+; X86-NEXT: btcl %eax, %edi
+; X86-NEXT: andl $96, %eax
+; X86-NEXT: shrl $3, %eax
; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %edi, 12(%ecx)
-; X86-NEXT: movl %ebx, 8(%ecx)
-; X86-NEXT: movl %eax, 4(%ecx)
-; X86-NEXT: movl %edx, (%ecx)
+; X86-NEXT: movl %edi, (%ecx,%eax)
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movw %dx, 14(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movw %dx, 12(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: movw %dx, 10(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: movw %dx, 8(%eax)
-; X86-NEXT: movl (%esp), %edx # 4-byte Reload
-; X86-NEXT: movw %dx, 6(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: movw %dx, 4(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movw %cx, 10(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movw %cx, 8(%eax)
+; X86-NEXT: movw %si, 6(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movw %cx, 4(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movw %cx, 2(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -1056,81 +1037,57 @@ define <8 x i16> @complement_ne_i128_bitcast(ptr %word, i32 %position) nounwind
;
; SSE2-LABEL: complement_ne_i128_bitcast:
; SSE2: # %bb.0:
-; SSE2-NEXT: movl %esi, %ecx
-; SSE2-NEXT: movl $1, %eax
-; SSE2-NEXT: xorl %edx, %edx
-; SSE2-NEXT: shldq %cl, %rax, %rdx
-; SSE2-NEXT: xorl %esi, %esi
-; SSE2-NEXT: shlq %cl, %rax
-; SSE2-NEXT: testb $64, %cl
-; SSE2-NEXT: cmovneq %rax, %rdx
-; SSE2-NEXT: cmovneq %rsi, %rax
+; SSE2-NEXT: # kill: def $esi killed $esi def $rsi
; SSE2-NEXT: movdqa (%rdi), %xmm0
-; SSE2-NEXT: xorq %rdx, 8(%rdi)
-; SSE2-NEXT: movq %xmm0, %rcx
-; SSE2-NEXT: xorq %rax, %rcx
-; SSE2-NEXT: movq %rcx, (%rdi)
+; SSE2-NEXT: movq 8(%rdi), %rax
+; SSE2-NEXT: movq %xmm0, %rdx
+; SSE2-NEXT: movl %esi, %ecx
+; SSE2-NEXT: andb $32, %cl
+; SSE2-NEXT: shrdq %cl, %rax, %rdx
+; SSE2-NEXT: shrq %cl, %rax
+; SSE2-NEXT: testb $64, %sil
+; SSE2-NEXT: cmoveq %rdx, %rax
+; SSE2-NEXT: btcl %esi, %eax
+; SSE2-NEXT: andl $96, %esi
+; SSE2-NEXT: shrl $3, %esi
+; SSE2-NEXT: movl %eax, (%rdi,%rsi)
; SSE2-NEXT: retq
;
; SSE4-LABEL: complement_ne_i128_bitcast:
; SSE4: # %bb.0:
-; SSE4-NEXT: movl %esi, %ecx
-; SSE4-NEXT: movl $1, %eax
-; SSE4-NEXT: xorl %edx, %edx
-; SSE4-NEXT: shldq %cl, %rax, %rdx
-; SSE4-NEXT: shlq %cl, %rax
-; SSE4-NEXT: xorl %esi, %esi
-; SSE4-NEXT: testb $64, %cl
-; SSE4-NEXT: cmovneq %rax, %rdx
-; SSE4-NEXT: cmovneq %rsi, %rax
+; SSE4-NEXT: # kill: def $esi killed $esi def $rsi
; SSE4-NEXT: movdqa (%rdi), %xmm0
-; SSE4-NEXT: movq %xmm0, %rcx
-; SSE4-NEXT: xorq %rax, %rcx
; SSE4-NEXT: pextrq $1, %xmm0, %rax
-; SSE4-NEXT: xorq %rdx, %rax
-; SSE4-NEXT: movq %rax, 8(%rdi)
-; SSE4-NEXT: movq %rcx, (%rdi)
+; SSE4-NEXT: movq %xmm0, %rdx
+; SSE4-NEXT: movl %esi, %ecx
+; SSE4-NEXT: andb $32, %cl
+; SSE4-NEXT: shrdq %cl, %rax, %rdx
+; SSE4-NEXT: shrq %cl, %rax
+; SSE4-NEXT: testb $64, %sil
+; SSE4-NEXT: cmoveq %rdx, %rax
+; SSE4-NEXT: btcl %esi, %eax
+; SSE4-NEXT: andl $96, %esi
+; SSE4-NEXT: shrl $3, %esi
+; SSE4-NEXT: movl %eax, (%rdi,%rsi)
; SSE4-NEXT: retq
;
-; AVX2-LABEL: complement_ne_i128_bitcast:
-; AVX2: # %bb.0:
-; AVX2-NEXT: movl %esi, %ecx
-; AVX2-NEXT: movl $1, %eax
-; AVX2-NEXT: xorl %edx, %edx
-; AVX2-NEXT: shldq %cl, %rax, %rdx
-; AVX2-NEXT: xorl %esi, %esi
-; AVX2-NEXT: shlxq %rcx, %rax, %rax
-; AVX2-NEXT: testb $64, %cl
-; AVX2-NEXT: cmovneq %rax, %rdx
-; AVX2-NEXT: cmovneq %rsi, %rax
-; AVX2-NEXT: vmovdqa (%rdi), %xmm0
-; AVX2-NEXT: vmovq %xmm0, %rcx
-; AVX2-NEXT: vpextrq $1, %xmm0, %rsi
-; AVX2-NEXT: xorq %rax, %rcx
-; AVX2-NEXT: xorq %rdx, %rsi
-; AVX2-NEXT: movq %rsi, 8(%rdi)
-; AVX2-NEXT: movq %rcx, (%rdi)
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: complement_ne_i128_bitcast:
-; AVX512: # %bb.0:
-; AVX512-NEXT: movl %esi, %ecx
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: movl $1, %edx
-; AVX512-NEXT: xorl %esi, %esi
-; AVX512-NEXT: shldq %cl, %rdx, %rsi
-; AVX512-NEXT: shlxq %rcx, %rdx, %rdx
-; AVX512-NEXT: testb $64, %cl
-; AVX512-NEXT: cmovneq %rdx, %rsi
-; AVX512-NEXT: cmovneq %rax, %rdx
-; AVX512-NEXT: vmovdqa (%rdi), %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: xorq %rdx, %rax
-; AVX512-NEXT: vpextrq $1, %xmm0, %rcx
-; AVX512-NEXT: xorq %rsi, %rcx
-; AVX512-NEXT: movq %rcx, 8(%rdi)
-; AVX512-NEXT: movq %rax, (%rdi)
-; AVX512-NEXT: retq
+; AVX-LABEL: complement_ne_i128_bitcast:
+; AVX: # %bb.0:
+; AVX-NEXT: # kill: def $esi killed $esi def $rsi
+; AVX-NEXT: vmovdqa (%rdi), %xmm0
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: vmovq %xmm0, %rdx
+; AVX-NEXT: movl %esi, %ecx
+; AVX-NEXT: andb $32, %cl
+; AVX-NEXT: shrdq %cl, %rax, %rdx
+; AVX-NEXT: shrxq %rcx, %rax, %rax
+; AVX-NEXT: testb $64, %sil
+; AVX-NEXT: cmoveq %rdx, %rax
+; AVX-NEXT: btcl %esi, %eax
+; AVX-NEXT: andl $96, %esi
+; AVX-NEXT: shrl $3, %esi
+; AVX-NEXT: movl %eax, (%rdi,%rsi)
+; AVX-NEXT: retq
%rem = and i32 %position, 127
%ofs = zext nneg i32 %rem to i128
%bit = shl nuw i128 1, %ofs
More information about the llvm-commits
mailing list