[llvm] [X86] narrowBitOpRMW - add handling for single bit insertion patterns (REAPPLIED) (PR #166337)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 4 00:56:49 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Insertion of a single bit into a large integer is typically canonicalized to "(X & ~(1 << ShAmt)) | (InsertBit << ShAmt)", which can be simplified to modify the i32 block as a BTR followed by an OR((i32)InsertBit << (ShAmt % 32).
We must ensure that the InsertBit is zero apart from the LSB so we can cheaply truncate it to work with the i32 block like the simpler BT patterns.
REAPPLIED from #<!-- -->165742 which was reverted as part of a chain of commits due to a sanitizer regression that should have been fixed by #<!-- -->166160
---
Patch is 43.41 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166337.diff
2 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+34-9)
- (modified) llvm/test/CodeGen/X86/bittest-big-integer.ll (+80-872)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b97b5089cb0a3..d4a4d4339f7e1 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53349,7 +53349,8 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
}
// Look for a RMW operation that only touches one bit of a larger than legal
-// type and fold it to a BTC/BTR/BTS pattern acting on a single i32 sub value.
+// type and fold it to a BTC/BTR/BTS or bit insertion pattern acting on a single
+// i32 sub value.
static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -53375,14 +53376,20 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
// BTR: X & ~(1 << ShAmt)
// BTS: X | (1 << ShAmt)
// BTC: X ^ (1 << ShAmt)
- SDValue ShAmt;
+ //
+ // BitInsert: (X & ~(1 << ShAmt)) | (InsertBit << ShAmt)
+ SDValue InsertBit, ShAmt;
if (!StoredVal.hasOneUse() ||
!(sd_match(StoredVal, m_And(m_Specific(LoadVal),
m_Not(m_Shl(m_One(), m_Value(ShAmt))))) ||
sd_match(StoredVal,
m_Or(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
sd_match(StoredVal,
- m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt))))))
+ m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
+ sd_match(StoredVal,
+ m_Or(m_And(m_Specific(LoadVal),
+ m_Not(m_Shl(m_One(), m_Value(ShAmt)))),
+ m_Shl(m_Value(InsertBit), m_Deferred(ShAmt))))))
return SDValue();
// Ensure the shift amount is in bounds.
@@ -53390,6 +53397,13 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
if (KnownAmt.getMaxValue().uge(VT.getSizeInBits()))
return SDValue();
+ // If we're inserting a bit then it must be the LSB.
+ if (InsertBit) {
+ KnownBits KnownInsert = DAG.computeKnownBits(InsertBit);
+ if (KnownInsert.countMinLeadingZeros() < (VT.getSizeInBits() - 1))
+ return SDValue();
+ }
+
// Split the shift into an alignment shift that moves the active i32 block to
// the bottom bits for truncation and a modulo shift that can act on the i32.
EVT AmtVT = ShAmt.getValueType();
@@ -53397,6 +53411,7 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
DAG.getSignedConstant(-32LL, DL, AmtVT));
SDValue ModuloAmt =
DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, DAG.getConstant(31, DL, AmtVT));
+ ModuloAmt = DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8);
// Compute the byte offset for the i32 block that is changed by the RMW.
// combineTruncate will adjust the load for us in a similar way.
@@ -53411,13 +53426,23 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
SDValue X = DAG.getNode(ISD::SRL, DL, VT, LoadVal, AlignAmt);
X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
- SDValue Mask =
- DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32),
- DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8));
- if (StoredVal.getOpcode() == ISD::AND)
- Mask = DAG.getNOT(DL, Mask, MVT::i32);
+ SDValue Mask = DAG.getNode(ISD::SHL, DL, MVT::i32,
+ DAG.getConstant(1, DL, MVT::i32), ModuloAmt);
+
+ SDValue Res;
+ if (InsertBit) {
+ SDValue BitMask =
+ DAG.getNode(ISD::SHL, DL, MVT::i32,
+ DAG.getZExtOrTrunc(InsertBit, DL, MVT::i32), ModuloAmt);
+ Res =
+ DAG.getNode(ISD::AND, DL, MVT::i32, X, DAG.getNOT(DL, Mask, MVT::i32));
+ Res = DAG.getNode(ISD::OR, DL, MVT::i32, Res, BitMask);
+ } else {
+ if (StoredVal.getOpcode() == ISD::AND)
+ Mask = DAG.getNOT(DL, Mask, MVT::i32);
+ Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask);
+ }
- SDValue Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask);
return DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(),
Align(), St->getMemOperand()->getFlags());
}
diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll
index c311ab869c311..87a54a0b9148d 100644
--- a/llvm/test/CodeGen/X86/bittest-big-integer.ll
+++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll
@@ -356,41 +356,20 @@ define i1 @init_eq_i64(ptr %word, i32 %position, i1 zeroext %value) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl $1, %edx
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: shldl %cl, %edx, %esi
-; X86-NEXT: shll %cl, %edx
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %edi, %edi
-; X86-NEXT: shldl %cl, %eax, %edi
-; X86-NEXT: shll %cl, %eax
-; X86-NEXT: testb $32, %cl
-; X86-NEXT: je .LBB9_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: .LBB9_2:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: notl %esi
-; X86-NEXT: notl %edx
-; X86-NEXT: je .LBB9_4
-; X86-NEXT: # %bb.3:
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: .LBB9_4:
-; X86-NEXT: andl 4(%ebx), %esi
-; X86-NEXT: orl %edi, %esi
-; X86-NEXT: andl (%ebx), %edx
-; X86-NEXT: orl %eax, %edx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: andl $32, %eax
-; X86-NEXT: shrl $3, %eax
-; X86-NEXT: movl (%ebx,%eax), %eax
-; X86-NEXT: btl %ecx, %eax
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: andl $32, %esi
+; X86-NEXT: shrl $3, %esi
+; X86-NEXT: movl (%edx,%esi), %edi
+; X86-NEXT: btl %ecx, %edi
; X86-NEXT: setae %al
-; X86-NEXT: movl %edx, (%ebx)
-; X86-NEXT: movl %esi, 4(%ebx)
+; X86-NEXT: btrl %ecx, %edi
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shll %cl, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: movl %ebx, (%edx,%esi)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -600,208 +579,55 @@ define i1 @set_ne_i128(ptr %word, i32 %position) nounwind {
define i1 @init_eq_i128(ptr %word, i32 %position, i1 zeroext %value) nounwind {
; X86-LABEL: init_eq_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $96, %esp
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movzbl 16(%ebp), %ebx
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: shrb $3, %al
-; X86-NEXT: andb $12, %al
-; X86-NEXT: negb %al
-; X86-NEXT: movsbl %al, %eax
-; X86-NEXT: movl 64(%esp,%eax), %edx
-; X86-NEXT: movl 68(%esp,%eax), %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: movzbl %bl, %eax
-; X86-NEXT: movl 72(%esp,%esi), %ebx
-; X86-NEXT: movl 76(%esp,%esi), %esi
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ebx, %edi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: shldl %cl, %eax, %edi
-; X86-NEXT: shldl %cl, %ebx, %esi
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: shll %cl, %ebx
-; X86-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NEXT: shldl %cl, %edx, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: notl %edi
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl (%esp), %ecx # 4-byte Reload
-; X86-NEXT: movl 36(%esp,%ecx), %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 40(%esp,%ecx), %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NEXT: shldl %cl, %eax, %edx
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: andl 8(%eax), %edi
-; X86-NEXT: orl %edx, %edi
-; X86-NEXT: notl %esi
-; X86-NEXT: movl (%esp), %eax # 4-byte Reload
-; X86-NEXT: movl 44(%esp,%eax), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: shldl %cl, %edx, %eax
-; X86-NEXT: movl 8(%ebp), %ecx
-; X86-NEXT: andl 12(%ecx), %esi
-; X86-NEXT: orl %eax, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: notl %ebx
-; X86-NEXT: movl (%esp), %eax # 4-byte Reload
-; X86-NEXT: movl 32(%esp,%eax), %edx
-; X86-NEXT: movl %edx, (%esp) # 4-byte Spill
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: shll %cl, %edx
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: andl (%eax), %ebx
-; X86-NEXT: orl %edx, %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: notl %edx
-; X86-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl (%esp), %esi # 4-byte Reload
-; X86-NEXT: shldl %cl, %esi, %eax
-; X86-NEXT: movl 8(%ebp), %ecx
-; X86-NEXT: andl 4(%ecx), %edx
-; X86-NEXT: orl %eax, %edx
-; X86-NEXT: movl 12(%ebp), %esi
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: andl $96, %eax
-; X86-NEXT: shrl $3, %eax
-; X86-NEXT: movl (%ecx,%eax), %eax
-; X86-NEXT: btl %esi, %eax
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: movl %edi, 8(%ecx)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, 12(%eax)
-; X86-NEXT: movl %ebx, (%eax)
-; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: andl $96, %esi
+; X86-NEXT: shrl $3, %esi
+; X86-NEXT: movl (%edx,%esi), %edi
+; X86-NEXT: btl %ecx, %edi
; X86-NEXT: setae %al
-; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: btrl %ecx, %edi
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shll %cl, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: movl %ebx, (%edx,%esi)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; SSE-LABEL: init_eq_i128:
; SSE: # %bb.0:
; SSE-NEXT: movl %esi, %ecx
-; SSE-NEXT: movl $1, %esi
-; SSE-NEXT: xorl %r8d, %r8d
-; SSE-NEXT: shldq %cl, %rsi, %r8
-; SSE-NEXT: shlq %cl, %rsi
-; SSE-NEXT: movl %edx, %eax
-; SSE-NEXT: xorl %edx, %edx
-; SSE-NEXT: shldq %cl, %rax, %rdx
-; SSE-NEXT: shlq %cl, %rax
-; SSE-NEXT: xorl %r9d, %r9d
-; SSE-NEXT: testb $64, %cl
-; SSE-NEXT: cmovneq %rsi, %r8
-; SSE-NEXT: cmovneq %r9, %rsi
-; SSE-NEXT: notq %r8
-; SSE-NEXT: cmovneq %rax, %rdx
-; SSE-NEXT: cmovneq %r9, %rax
-; SSE-NEXT: notq %rsi
-; SSE-NEXT: andq 8(%rdi), %r8
-; SSE-NEXT: orq %rdx, %r8
-; SSE-NEXT: andq (%rdi), %rsi
-; SSE-NEXT: orq %rax, %rsi
-; SSE-NEXT: movl %ecx, %eax
-; SSE-NEXT: andl $96, %eax
-; SSE-NEXT: shrl $3, %eax
-; SSE-NEXT: movl (%rdi,%rax), %eax
-; SSE-NEXT: btl %ecx, %eax
+; SSE-NEXT: andl $96, %esi
+; SSE-NEXT: shrl $3, %esi
+; SSE-NEXT: movl (%rdi,%rsi), %r8d
+; SSE-NEXT: btl %ecx, %r8d
; SSE-NEXT: setae %al
-; SSE-NEXT: movq %rsi, (%rdi)
-; SSE-NEXT: movq %r8, 8(%rdi)
+; SSE-NEXT: shll %cl, %edx
+; SSE-NEXT: btrl %ecx, %r8d
+; SSE-NEXT: orl %r8d, %edx
+; SSE-NEXT: movl %edx, (%rdi,%rsi)
; SSE-NEXT: retq
;
-; AVX2-LABEL: init_eq_i128:
-; AVX2: # %bb.0:
-; AVX2-NEXT: movl %esi, %ecx
-; AVX2-NEXT: movl $1, %eax
-; AVX2-NEXT: xorl %esi, %esi
-; AVX2-NEXT: shldq %cl, %rax, %rsi
-; AVX2-NEXT: movl %edx, %edx
-; AVX2-NEXT: xorl %r8d, %r8d
-; AVX2-NEXT: shldq %cl, %rdx, %r8
-; AVX2-NEXT: xorl %r9d, %r9d
-; AVX2-NEXT: shlxq %rcx, %rax, %rax
-; AVX2-NEXT: testb $64, %cl
-; AVX2-NEXT: cmovneq %rax, %rsi
-; AVX2-NEXT: cmovneq %r9, %rax
-; AVX2-NEXT: shlxq %rcx, %rdx, %rdx
-; AVX2-NEXT: cmovneq %rdx, %r8
-; AVX2-NEXT: cmovneq %r9, %rdx
-; AVX2-NEXT: andnq 8(%rdi), %rsi, %rsi
-; AVX2-NEXT: orq %r8, %rsi
-; AVX2-NEXT: andnq (%rdi), %rax, %r8
-; AVX2-NEXT: orq %rdx, %r8
-; AVX2-NEXT: movl %ecx, %eax
-; AVX2-NEXT: andl $96, %eax
-; AVX2-NEXT: shrl $3, %eax
-; AVX2-NEXT: movl (%rdi,%rax), %eax
-; AVX2-NEXT: btl %ecx, %eax
-; AVX2-NEXT: setae %al
-; AVX2-NEXT: movq %r8, (%rdi)
-; AVX2-NEXT: movq %rsi, 8(%rdi)
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: init_eq_i128:
-; AVX512: # %bb.0:
-; AVX512-NEXT: movl %esi, %ecx
-; AVX512-NEXT: movl $1, %eax
-; AVX512-NEXT: xorl %esi, %esi
-; AVX512-NEXT: shldq %cl, %rax, %rsi
-; AVX512-NEXT: xorl %r8d, %r8d
-; AVX512-NEXT: shlxq %rcx, %rax, %rax
-; AVX512-NEXT: movl %edx, %edx
-; AVX512-NEXT: xorl %r9d, %r9d
-; AVX512-NEXT: shldq %cl, %rdx, %r9
-; AVX512-NEXT: testb $64, %cl
-; AVX512-NEXT: cmovneq %rax, %rsi
-; AVX512-NEXT: cmovneq %r8, %rax
-; AVX512-NEXT: shlxq %rcx, %rdx, %rdx
-; AVX512-NEXT: cmovneq %rdx, %r9
-; AVX512-NEXT: cmovneq %r8, %rdx
-; AVX512-NEXT: andnq 8(%rdi), %rsi, %rsi
-; AVX512-NEXT: orq %r9, %rsi
-; AVX512-NEXT: andnq (%rdi), %rax, %r8
-; AVX512-NEXT: orq %rdx, %r8
-; AVX512-NEXT: movl %ecx, %eax
-; AVX512-NEXT: andl $96, %eax
-; AVX512-NEXT: shrl $3, %eax
-; AVX512-NEXT: movl (%rdi,%rax), %eax
-; AVX512-NEXT: btl %ecx, %eax
-; AVX512-NEXT: setae %al
-; AVX512-NEXT: movq %r8, (%rdi)
-; AVX512-NEXT: movq %rsi, 8(%rdi)
-; AVX512-NEXT: retq
+; AVX-LABEL: init_eq_i128:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %esi, %ecx
+; AVX-NEXT: andl $96, %ecx
+; AVX-NEXT: shrl $3, %ecx
+; AVX-NEXT: movl (%rdi,%rcx), %r8d
+; AVX-NEXT: btl %esi, %r8d
+; AVX-NEXT: setae %al
+; AVX-NEXT: btrl %esi, %r8d
+; AVX-NEXT: shlxl %esi, %edx, %edx
+; AVX-NEXT: orl %r8d, %edx
+; AVX-NEXT: movl %edx, (%rdi,%rcx)
+; AVX-NEXT: retq
%rem = and i32 %position, 127
%ofs = zext nneg i32 %rem to i128
%bit = shl nuw i128 1, %ofs
@@ -977,673 +803,55 @@ define i1 @set_ne_i512(ptr %word, i32 %position) nounwind {
define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind {
; X86-LABEL: init_eq_i512:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $352, %esp # imm = 0x160
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: shrl $3, %edx
-; X86-NEXT: andl $60, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: subl %edx, %eax
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 56(%eax), %esi
-; X86-NEXT: movl 60(%eax), %ebx
-; X86-NEXT: movl 52(%eax), %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 48(%eax), %edi
-; X86-NEXT: movl 44(%eax), %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 40(%eax), %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 36(%eax), %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 32(%eax), %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 28(%eax), %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 24(%eax), %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 20(%eax), %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 16(%eax), %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 12(%eax), %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 8(%eax), %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 4(%eax), %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movzbl 16(%ebp), %eax
-; X86-NEXT: movzbl %al, %eax
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: andl $31, %ecx
-; X86-NEXT: shldl %cl, %esi, %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: shldl %cl, %eax, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shldl %cl, %edi, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: shldl %cl, %ebx, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: shldl %cl, %edx, %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: shldl %cl, %eax, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: shldl %cl, %edx, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: shldl %cl, %eax, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: shldl %cl, %edx, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: shldl %cl, %eax, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: shldl %cl, %edx, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/166337
More information about the llvm-commits
mailing list