[llvm] [X86] bittest-big-integer.ll - add BLSR style pattern test (PR #168356)

via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 17 04:05:22 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

<details>
<summary>Changes</summary>

Test using CTTZ to determine the lowest set bit, clear it and return the index

Shows failure to use RMW pattern on the load-btr-store due to additional (but non-interference) uses of the load.

---

Patch is 24.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168356.diff


1 Files Affected:

- (modified) llvm/test/CodeGen/X86/bittest-big-integer.ll (+615) 


``````````diff
diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll
index 9b7569ff8b29f..b85a20b9d6b6e 100644
--- a/llvm/test/CodeGen/X86/bittest-big-integer.ll
+++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll
@@ -1488,3 +1488,618 @@ define i1 @sequence_i128(ptr %word, i32 %pos0, i32 %pos1, i32 %pos2) nounwind {
   store i128 %res2, ptr %word
   ret i1 %cmp1
 }
+
+define i32 @blsr_u512(ptr %word) nounwind {
+; X86-LABEL: blsr_u512:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-16, %esp
+; X86-NEXT:    subl $240, %esp
+; X86-NEXT:    movl 8(%ebp), %ebx
+; X86-NEXT:    movl 12(%ebx), %esi
+; X86-NEXT:    movl 28(%ebx), %eax
+; X86-NEXT:    movl 60(%ebx), %ecx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl 44(%ebx), %edx
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl 20(%ebx), %edx
+; X86-NEXT:    movl 52(%ebx), %eax
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    movl 4(%ebx), %edi
+; X86-NEXT:    movl 36(%ebx), %esi
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl 24(%ebx), %edx
+; X86-NEXT:    movl 56(%ebx), %ecx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    movl 8(%ebx), %ecx
+; X86-NEXT:    movl 40(%ebx), %esi
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    movl 16(%ebx), %edx
+; X86-NEXT:    movl 48(%ebx), %esi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %esi, %edx
+; X86-NEXT:    movl (%ebx), %esi
+; X86-NEXT:    movl 32(%ebx), %ebx
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %ebx, %esi
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    orl %ecx, %esi
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    je .LBB26_1
+; X86-NEXT:  # %bb.2: # %cond.false
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    jne .LBB26_3
+; X86-NEXT:  # %bb.4: # %cond.false
+; X86-NEXT:    rep bsfl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    addl $32, %eax
+; X86-NEXT:    jmp .LBB26_5
+; X86-NEXT:  .LBB26_1:
+; X86-NEXT:    movl $512, %ecx # imm = 0x200
+; X86-NEXT:    jmp .LBB26_41
+; X86-NEXT:  .LBB26_3:
+; X86-NEXT:    rep bsfl %ebx, %eax
+; X86-NEXT:  .LBB26_5: # %cond.false
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    jne .LBB26_6
+; X86-NEXT:  # %bb.7: # %cond.false
+; X86-NEXT:    rep bsfl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    addl $32, %ecx
+; X86-NEXT:    jmp .LBB26_8
+; X86-NEXT:  .LBB26_6:
+; X86-NEXT:    rep bsfl %ecx, %ecx
+; X86-NEXT:  .LBB26_8: # %cond.false
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB26_10
+; X86-NEXT:  # %bb.9: # %cond.false
+; X86-NEXT:    addl $64, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB26_10: # %cond.false
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB26_11
+; X86-NEXT:  # %bb.12: # %cond.false
+; X86-NEXT:    rep bsfl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    addl $32, %ecx
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    je .LBB26_15
+; X86-NEXT:  .LBB26_14:
+; X86-NEXT:    rep bsfl %edx, %edx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    je .LBB26_17
+; X86-NEXT:    jmp .LBB26_18
+; X86-NEXT:  .LBB26_11:
+; X86-NEXT:    rep bsfl %esi, %ecx
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    jne .LBB26_14
+; X86-NEXT:  .LBB26_15: # %cond.false
+; X86-NEXT:    rep bsfl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    addl $32, %edx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB26_18
+; X86-NEXT:  .LBB26_17: # %cond.false
+; X86-NEXT:    addl $64, %edx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB26_18: # %cond.false
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    jne .LBB26_20
+; X86-NEXT:  # %bb.19: # %cond.false
+; X86-NEXT:    subl $-128, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB26_20: # %cond.false
+; X86-NEXT:    addl $256, %eax # imm = 0x100
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    jne .LBB26_21
+; X86-NEXT:  # %bb.22: # %cond.false
+; X86-NEXT:    rep bsfl %edi, %ebx
+; X86-NEXT:    addl $32, %ebx
+; X86-NEXT:    jmp .LBB26_23
+; X86-NEXT:  .LBB26_21:
+; X86-NEXT:    rep bsfl %edx, %ebx
+; X86-NEXT:  .LBB26_23: # %cond.false
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    jne .LBB26_24
+; X86-NEXT:  # %bb.25: # %cond.false
+; X86-NEXT:    rep bsfl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    addl $32, %ecx
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    je .LBB26_27
+; X86-NEXT:    jmp .LBB26_28
+; X86-NEXT:  .LBB26_24:
+; X86-NEXT:    rep bsfl %ecx, %ecx
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    jne .LBB26_28
+; X86-NEXT:  .LBB26_27: # %cond.false
+; X86-NEXT:    addl $64, %ecx
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:  .LBB26_28: # %cond.false
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    jne .LBB26_29
+; X86-NEXT:  # %bb.30: # %cond.false
+; X86-NEXT:    rep bsfl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    addl $32, %ecx
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    je .LBB26_33
+; X86-NEXT:  .LBB26_32:
+; X86-NEXT:    rep bsfl %edx, %edx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    je .LBB26_35
+; X86-NEXT:    jmp .LBB26_36
+; X86-NEXT:  .LBB26_29:
+; X86-NEXT:    rep bsfl %esi, %ecx
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    jne .LBB26_32
+; X86-NEXT:  .LBB26_33: # %cond.false
+; X86-NEXT:    rep bsfl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    addl $32, %edx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    jne .LBB26_36
+; X86-NEXT:  .LBB26_35: # %cond.false
+; X86-NEXT:    addl $64, %edx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:  .LBB26_36: # %cond.false
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    jne .LBB26_38
+; X86-NEXT:  # %bb.37: # %cond.false
+; X86-NEXT:    subl $-128, %ecx
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:  .LBB26_38: # %cond.false
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    orl %ecx, %esi
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    jne .LBB26_40
+; X86-NEXT:  # %bb.39: # %cond.false
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:  .LBB26_40: # %cond.false
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:  .LBB26_41: # %cond.end
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    shrl $3, %esi
+; X86-NEXT:    andl $60, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    subl %esi, %edx
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl $31, %ecx
+; X86-NEXT:    movl 56(%edx), %edi
+; X86-NEXT:    movl 60(%edx), %esi
+; X86-NEXT:    shldl %cl, %edi, %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 52(%edx), %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl %cl, %esi, %edi
+; X86-NEXT:    notl %edi
+; X86-NEXT:    andl %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 40(%edx), %eax
+; X86-NEXT:    movl 44(%edx), %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl %cl, %eax, %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 36(%edx), %esi
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    notl %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 32(%edx), %eax
+; X86-NEXT:    shldl %cl, %eax, %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 28(%edx), %esi
+; X86-NEXT:    shldl %cl, %esi, %eax
+; X86-NEXT:    notl %eax
+; X86-NEXT:    andl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 24(%edx), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl %cl, %eax, %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 4(%edx), %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 8(%edx), %eax
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    notl %edx
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 12(%ebx), %edx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    shldl %cl, %eax, %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 16(%ebx), %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    shldl %cl, %edx, %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 20(%ebx), %edx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    shldl %cl, %eax, %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    notl %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    negl %eax
+; X86-NEXT:    movl 208(%esp,%eax), %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    notl %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shldl %cl, %eax, %edx
+; X86-NEXT:    notl %edx
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl (%ebx), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    notl %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl %cl, %edi, %ebx
+; X86-NEXT:    notl %ebx
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:    movl 8(%ebp), %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    movl %ebx, 24(%ecx)
+; X86-NEXT:    movl %esi, 20(%ecx)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, 16(%ecx)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, 12(%ecx)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, 8(%ecx)
+; X86-NEXT:    movl %edi, 4(%ecx)
+; X86-NEXT:    movl %eax, (%ecx)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 28(%ecx)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 32(%ecx)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 36(%ecx)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 40(%ecx)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 44(%ecx)
+; X86-NEXT:    movl %edx, 48(%ecx)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 52(%ecx)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 56(%ecx)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 60(%ecx)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; SSE-LABEL: blsr_u512:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushq %r15
+; SSE-NEXT:    pushq %r14
+; SSE-NEXT:    pushq %r12
+; SSE-NEXT:    pushq %rbx
+; SSE-NEXT:    pushq %rax
+; SSE-NEXT:    movq 56(%rdi), %rcx
+; SSE-NEXT:    movq 48(%rdi), %rdx
+; SSE-NEXT:    movq 40(%rdi), %rsi
+; SSE-NEXT:    movq 32(%rdi), %r11
+; SSE-NEXT:    movq 24(%rdi), %r8
+; SSE-NEXT:    movq 16(%rdi), %r9
+; SSE-NEXT:    movq (%rdi), %rax
+; SSE-NEXT:    movq 8(%rdi), %r10
+; SSE-NEXT:    rep bsfq %rax, %rbx
+; SSE-NEXT:    rep bsfq %r10, %r14
+; SSE-NEXT:    addq $64, %r14
+; SSE-NEXT:    testq %rax, %rax
+; SSE-NEXT:    cmovneq %rbx, %r14
+; SSE-NEXT:    rep bsfq %r9, %r15
+; SSE-NEXT:    rep bsfq %r8, %rbx
+; SSE-NEXT:    addq $64, %rbx
+; SSE-NEXT:    testq %r9, %r9
+; SSE-NEXT:    cmovneq %r15, %rbx
+; SSE-NEXT:    subq $-128, %rbx
+; SSE-NEXT:    movq %rax, %r15
+; SSE-NEXT:    movq %rax, %r12
+; SSE-NEXT:    orq %r10, %r12
+; SSE-NEXT:    cmovneq %r14, %rbx
+; SSE-NEXT:    rep bsfq %r11, %r12
+; SSE-NEXT:    rep bsfq %rsi, %r14
+; SSE-NEXT:    addq $64, %r14
+; SSE-NEXT:    testq %r11, %r11
+; SSE-NEXT:    cmovneq %r12, %r14
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    rep bsfq %rdx, %r12
+; SSE-NEXT:    movl $64, %eax
+; SSE-NEXT:    rep bsfq %rcx, %rax
+; SSE-NEXT:    addq $64, %rax
+; SSE-NEXT:    testq %rdx, %rdx
+; SSE-NEXT:    cmovneq %r12, %rax
+; SSE-NEXT:    subq $-128, %rax
+; SSE-NEXT:    movq %r11, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    orq %rsi, %r11
+; SSE-NEXT:    cmovneq %r14, %rax
+; SSE-NEXT:    addq $256, %rax # imm = 0x100
+; SSE-NEXT:    movq %r10, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    orq %r8, %r10
+; SSE-NEXT:    orq %r9, %r15
+; SSE-NEXT:    orq %r10, %r15
+; SSE-NEXT:    cmovneq %rbx, %rax
+; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movq %r8, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movq %r9, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movl %eax, %ecx
+; SSE-NEXT:    andl $32, %ecx
+; SSE-NEXT:    movl %eax, %edx
+; SSE-NEXT:    andl $480, %edx # imm = 0x1E0
+; SSE-NEXT:    shrl $3, %edx
+; SSE-NEXT:    movl %edx, %esi
+; SSE-NEXT:    andl $-8, %esi
+; SSE-NEXT:    movq -128(%rsp,%rsi), %r8
+; SSE-NEXT:    shrq %cl, %r8
+; SSE-NEXT:    movl -120(%rsp,%rsi), %esi
+; SSE-NEXT:    addl %esi, %esi
+; SSE-NEXT:    notl %ecx
+; SSE-NEXT:    # kill: def $cl killed $cl killed $ecx
+; SSE-NEXT:    shlq %cl, %rsi
+; SSE-NEXT:    orl %r8d, %esi
+; SSE-NEXT:    btrl %eax, %esi
+; SSE-NEXT:    movl %esi, (%rdi,%rdx)
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    addq $8, %rsp
+; SSE-NEXT:    popq %rbx
+; SSE-NEXT:    popq %r12
+; SSE-NEXT:    popq %r14
+; SSE-NEXT:    popq %r15
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: blsr_u512:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    pushq %r15
+; AVX2-NEXT:    pushq %r14
+; AVX2-NEXT:    pushq %r13
+; AVX2-NEXT:    pushq %r12
+; AVX2-NEXT:    pushq %rbx
+; AVX2-NEXT:    movq 56(%rdi), %rcx
+; AVX2-NEXT:    movq 40(%rdi), %rdx
+; AVX2-NEXT:    movq 32(%rd...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/168356


More information about the llvm-commits mailing list